2020
Petegrosso, Raphael; Song, Tianci; Kuang, Rui
Hierarchical Canonical Correlation Analysis Reveals Phenotype, Genotype, and Geoclimate Associations in Plants Journal Article
In: Plant Phenomics, vol. 2020, no. 1969142, 2020.
Links | BibTeX | Tags: Phenome-genome Association
@article{Petegrosso2020,
title = {Hierarchical Canonical Correlation Analysis Reveals Phenotype, Genotype, and Geoclimate Associations in Plants},
author = {Raphael Petegrosso and Tianci Song and Rui Kuang},
url = {https://spj.sciencemag.org/plantphenomics/2020/1969142/cta/},
doi = {10.34133/2020/1969142},
year = {2020},
date = {2020-03-31},
journal = {Plant Phenomics},
volume = {2020},
number = {1969142},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {article}
}
2017
Zhang, Wei; Chien, Jeremy; Yong, Jeongsik; Kuang, Rui
Network-based Machine Learning and Graph Theory Algorithms for Precision Oncology Journal Article
In: NPJ Precision Oncology, no. 25, 2017.
Abstract | Links | BibTeX | Tags: Phenome-genome Association, Protein-Protein Interaction Network
@article{networkreview2017,
title = {Network-based Machine Learning and Graph Theory Algorithms for Precision Oncology},
author = {Wei Zhang and Jeremy Chien and Jeongsik Yong and Rui Kuang},
url = {https://www.nature.com/articles/s41698-017-0029-7},
doi = {doi:10.1038/s41698-017-0029-7},
year = {2017},
date = {2017-08-08},
journal = {NPJ Precision Oncology},
number = {25},
abstract = {Network-based analytics plays an increasingly important role in precision oncology. Growing evidence in recent studies suggests that cancer can be better understood through mutated or dysregulated pathways or networks rather than individual mutations and that the efficacy of repositioned drugs can be inferred from disease modules in molecular networks. This article reviews network-based machine learning and graph theory algorithms for integrative analysis of personal genomic data and biomedical knowledge bases to identify tumor-specific molecular mechanisms, candidate targets and repositioned drugs for personalized treatment. The review focuses on the algorithmic design and mathematical formulation of these methods to facilitate applications and implementations of network-based analysis in the practice of precision oncology. We review the methods applied in three scenarios to integrate genomic data and network models in different analysis pipelines, and we examine three categories of network-based approaches for repositioning drugs in drug-disease-gene networks. In addition, we perform a comprehensive subnetwork/pathway analysis of mutations in 31 cancer genome projects in the Cancer Genome Atlas (TCGA) and present a detailed case study on ovarian cancer. Finally, we discuss interesting observations, potential pitfalls and future directions in network-based precision oncology.},
keywords = {Phenome-genome Association, Protein-Protein Interaction Network},
pubstate = {published},
tppubtype = {article}
}
2016
Petegrosso, Raphael; Park, Sunho; Hwang, Tae Hyun; Kuang, Rui
Transfer Learning across Ontologies for Phenome-Genome Association Prediction Journal Article
In: Bioinformatics, vol. 33, no. 4, pp. 529-536, 2016.
Abstract | Links | BibTeX | Tags: Phenome-genome Association, Transfer Learning
@article{petegrosso2016transfer,
title = {Transfer Learning across Ontologies for Phenome-Genome Association Prediction},
author = {Raphael Petegrosso and Sunho Park and Tae Hyun Hwang and Rui Kuang},
url = {http://bioinformatics.oxfordjournals.org/content/early/2016/10/20/bioinformatics.btw649.abstract},
doi = {10.1093/bioinformatics/btw649},
year = {2016},
date = {2016-11-23},
journal = {Bioinformatics},
volume = {33},
number = {4},
pages = {529-536},
publisher = {Oxford Univ Press},
abstract = {Motivation: To better predict and analyze gene associations with the collection of phenotypes organized in a phenotype ontology, it is crucial to effectively model the hierarchical structure among the phenotypes in the ontology and leverage the sparse known associations with additional training information. In this paper, we first introduce Dual Label Propagation (DLP) to impose consistent associations with the entire phenotype paths in predicting phenotype-gene associations in Human Phenotype Ontology (HPO). DLP is then used as the base model in a transfer learning framework (tlDLP) to incorporate functional annotations in Gene Ontology (GO). By simultaneously reconstructing GO term-gene associations and HPO phenotype-gene associations for all the genes in a protein-protein interaction network, tlDLP benefits from the enriched training associations indirectly through relation with GO terms.
Results: In the experiments to predict the associations between human genes and phenotypes in HPO based on human protein-protein interaction network, both DLP and tlDLP improved the prediction of gene associations with phenotype paths in HPO in cross-validation and the prediction of the most recent associations added after the snapshot of the training data. Moreover, the transfer learning through GO term-gene associations significantly improved association predictions for the phenotypes with no more specific known associations by a large margin. Examples are also shown to demonstrate how phenotype paths in phenotype ontology and transfer learning with gene ontology can improve the predictions.
Availability: Source code is available at http://localhost/~raphaelpetegrosso/wpcb/ontophenome.},
keywords = {Phenome-genome Association, Transfer Learning},
pubstate = {published},
tppubtype = {article}
}
Results: In the experiments to predict the associations between human genes and phenotypes in HPO based on human protein-protein interaction network, both DLP and tlDLP improved the prediction of gene associations with phenotype paths in HPO in cross-validation and the prediction of the most recent associations added after the snapshot of the training data. Moreover, the transfer learning through GO term-gene associations significantly improved association predictions for the phenotypes with no more specific known associations by a large margin. Examples are also shown to demonstrate how phenotype paths in phenotype ontology and transfer learning with gene ontology can improve the predictions.
Availability: Source code is available at http://localhost/~raphaelpetegrosso/wpcb/ontophenome.
2015
Xie, MaoQiang; Xu, YingJie; Zhang, YaoGong; Hwang, TaeHyun; Kuang, Rui
Network-based Phenome-Genome Association Prediction by Bi-Random Walk Journal Article
In: PloS one, vol. 10, no. 5, pp. e0125138, 2015.
Abstract | Links | BibTeX | Tags: Phenome-genome Association
@article{xie2015network,
title = {Network-based Phenome-Genome Association Prediction by Bi-Random Walk},
author = {MaoQiang Xie and YingJie Xu and YaoGong Zhang and TaeHyun Hwang and Rui Kuang},
url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0125138},
doi = {10.1371/journal.pone.0125138},
year = {2015},
date = {2015-05-01},
journal = {PloS one},
volume = {10},
number = {5},
pages = {e0125138},
publisher = {Public Library of Science},
abstract = {The availability of ontologies and systematic documentations of phenotypes and their genetic associations has enabled large-scale network-based global analyses of the association between the complete collection of phenotypes (phenome) and genes. To provide a fundamental understanding of how the network information is relevant to phenotype-gene associations, we analyze the circular bigraphs (CBGs) in OMIM human disease phenotype-gene association network and MGI mouse phentoype-gene association network, and introduce a bi-random walk (BiRW) algorithm to capture the CBG patterns in the networks for unveiling human and mouse phenome-genome association. BiRW performs separate random walk simultaneously on gene interaction network and phenotype similarity network to explore gene paths and phenotype paths in CBGs of different sizes to summarize their associations as predictions.},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {article}
}
2012
Hwang, TaeHyun; Atluri, Gowtham; Xie, MaoQiang; Dey, Sanjoy; Hong, Changjin; Kumar, Vipin; Kuang, Rui
Co-clustering phenome--genome for phenotype classification and disease gene discovery Journal Article
In: Nucleic acids research, vol. 40, no. 19, pp. e146–e146, 2012.
Abstract | Links | BibTeX | Tags: Phenome-genome Association
@article{hwang2012co,
title = {Co-clustering phenome--genome for phenotype classification and disease gene discovery},
author = {TaeHyun Hwang and Gowtham Atluri and MaoQiang Xie and Sanjoy Dey and Changjin Hong and Vipin Kumar and Rui Kuang},
url = {http://nar.oxfordjournals.org/content/40/19/e146.short},
doi = {10.1093/nar/gks615},
year = {2012},
date = {2012-06-26},
journal = {Nucleic acids research},
volume = {40},
number = {19},
pages = {e146--e146},
publisher = {Oxford Univ Press},
abstract = {Understanding the categorization of human diseases is critical for reliably identifying disease causal genes. Recently, genome-wide studies of abnormal chromosomal locations related to diseases have mapped >2000 phenotype–gene relations, which provide valuable information for classifying diseases and identifying candidate genes as drug targets. In this article, a regularized non-negative matrix tri-factorization (R-NMTF) algorithm is introduced to co-cluster phenotypes and genes, and simultaneously detect associations between the detected phenotype clusters and gene clusters. The R-NMTF algorithm factorizes the phenotype–gene association matrix under the prior knowledge from phenotype similarity network and protein–protein interaction network, supervised by the label information from known disease classes and biological pathways. In the experiments on disease phenotype–gene associations in OMIM and KEGG disease pathways, R-NMTF significantly improved the classification of disease phenotypes and disease pathway genes compared with support vector machines and Label Propagation in cross-validation on the annotated phenotypes and genes. The newly predicted phenotypes in each disease class are highly consistent with human phenotype ontology annotations. The roles of the new member genes in the disease pathways are examined and validated in the protein–protein interaction subnetworks. Extensive literature review also confirmed many new members of the disease classes and pathways as well as the predicted associations between disease phenotype classes and pathways.},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {article}
}
Xie, Maoqiang; Hwang, Taehyun; Kuang, Rui
Prioritizing disease genes by bi-random walk Proceedings Article
In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 292–303, Springer 2012.
Abstract | Links | BibTeX | Tags: Phenome-genome Association
@inproceedings{xie2012prioritizing,
title = {Prioritizing disease genes by bi-random walk},
author = {Maoqiang Xie and Taehyun Hwang and Rui Kuang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/PAKDD2012.pdf},
doi = {10.1007/978-3-642-30220-6_25},
year = {2012},
date = {2012-05-29},
booktitle = {Pacific-Asia Conference on Knowledge Discovery and Data Mining},
pages = {292--303},
organization = {Springer},
abstract = {Random walk methods have been successfully applied to prioritizing disease causal genes. In this paper, we propose a bi-random walk algorithm (BiRW) based on a regularization framework for graph matching to globally prioritize disease genes for all phenotypes simultaneously. While previous methods perform random walk either on the protein-protein interaction network or the complete phenome-genome heterogenous network, BiRW performs random walk on the Kronecker product graph between the protein-protein interaction network and the phenotype similarity network. Three variations of BiRW that perform balanced or unbalanced bi-directional random walks are analyzed and compared with other random walk methods. Experiments on analyzing the disease phenotype-gene associations in Online Mendelian Inheritance in Man (OMIM) demonstrate that BiRW effectively improved disease gene prioritization over existing methods by ranking more known associations in the top 100 out of nearly 10,000 candidate genes.},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
Hwang, TaeHyun; Zhang, Wei; Xie, Maoqiang; Liu, Jinfeng; Kuang, Rui
Inferring disease and gene set associations with rank coherence in networks Journal Article
In: Bioinformatics, vol. 27, no. 19, pp. 2692–2699, 2011.
Abstract | Links | BibTeX | Tags: Phenome-genome Association
@article{hwang2011inferring,
title = {Inferring disease and gene set associations with rank coherence in networks},
author = {TaeHyun Hwang and Wei Zhang and Maoqiang Xie and Jinfeng Liu and Rui Kuang},
url = {http://bioinformatics.oxfordjournals.org/content/27/19/2692},
doi = {10.1093/bioinformatics/btr463},
year = {2011},
date = {2011-08-02},
journal = {Bioinformatics},
volume = {27},
number = {19},
pages = {2692--2699},
publisher = {Oxford Univ Press},
abstract = {Motivation: To validate the candidate disease genes identified from high-throughput genomic studies, a necessary step is to elucidate the associations between the set of candidate genes and disease phenotypes. The conventional gene set enrichment analysis often fails to reveal associations between disease phenotypes and the gene sets with a short list of poorly annotated genes, because the existing annotations of disease-causative genes are incomplete. This article introduces a network-based computational approach called rcNet to discover the associations between gene sets and disease phenotypes. A learning framework is proposed to maximize the coherence between the predicted phenotype–gene set relations and the known disease phenotype-gene associations. An efficient algorithm coupling ridge regression with label propagation and two variants are designed to find the optimal solution to the objective functions of the learning framework.
Results: We evaluated the rcNet algorithms with leave-one-out cross-validation on Online Mendelian Inheritance in Man (OMIM) data and an independent test set of recently discovered disease–gene associations. In the experiments, the rcNet algorithms achieved best overall rankings compared with the baselines. To further validate the reproducibility of the performance, we applied the algorithms to identify the target diseases of novel candidate disease genes obtained from recent studies of Genome-Wide Association Study (GWAS), DNA copy number variation analysis and gene expression profiling. The algorithms ranked the target disease of the candidate genes at the top of the rank list in many cases across all the three case studies.},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {article}
}
Results: We evaluated the rcNet algorithms with leave-one-out cross-validation on Online Mendelian Inheritance in Man (OMIM) data and an independent test set of recently discovered disease–gene associations. In the experiments, the rcNet algorithms achieved best overall rankings compared with the baselines. To further validate the reproducibility of the performance, we applied the algorithms to identify the target diseases of novel candidate disease genes obtained from recent studies of Genome-Wide Association Study (GWAS), DNA copy number variation analysis and gene expression profiling. The algorithms ranked the target disease of the candidate genes at the top of the rank list in many cases across all the three case studies.
2010
Hwang, TaeHyun; Kuang, Rui
A Heterogeneous Label Propagation Algorithm for Disease Gene Discovery Proceedings Article
In: Society for Industrial and Applied Mathematics. Proceedings of the SIAM International Conference on Data Mining, pp. 583, Society for Industrial and Applied Mathematics 2010.
Abstract | Links | BibTeX | Tags: Phenome-genome Association
@inproceedings{hwang2010heterogeneous,
title = {A Heterogeneous Label Propagation Algorithm for Disease Gene Discovery},
author = {TaeHyun Hwang and Rui Kuang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/SDM2010.pdf},
doi = {10.1137/1.9781611972801.51},
year = {2010},
date = {2010-04-29},
booktitle = {Society for Industrial and Applied Mathematics. Proceedings of the SIAM International Conference on Data Mining},
pages = {583},
organization = {Society for Industrial and Applied Mathematics},
abstract = {Label propagation is an effective and efficient technique to utilize local and global features in a network for semi-supervised learning. In the literature, one challenge is how to propagate information in heterogeneous networks comprising several subnetworks, each of which has its own cluster structures that need to be explored independently. In this paper, we introduce an intutitive algorithm MINProp (Mutual Interaction-based Network Propagation) and a simple regularization framework for propagating information between subnetworks in a heterogeneous network. MINProp sequentially performs label propagation on each individual subnetwork with the current label information derived from the other subnetworks and repeats this step until convergence to the global optimal solution to the convex objective function of the regularization framework. The independent label propagation on each subnetwork explores the cluster structure in the subnetwork. The label information from the other subnetworks is used to capture mutual interactions (bicluster structures) between the vertices in each pair of the subnetworks. MINProp algorithm is applied to disease gene discovery from a heterogeneus network of disease phenotypes and genes. In the experiments, MINProp significantly output-performed the original label propagation algorithm on a single network and the state-of-the-art methods for discovering disease genes. The results also suggest that MINProp is more effective in utilizing the modular structures in a heterogenous network. Finally, MINProp discovered new disease-gene associations that are only reported recently.},
keywords = {Phenome-genome Association},
pubstate = {published},
tppubtype = {inproceedings}
}