2019
Li, Zhuliu; Zhang, Wei; Huang, R Stephanie; Kuang, Rui
Learning a Low-rank Tensor of Pharmacogenomic Multi-relations from Biomedical Networks Proceedings
IEEE International Conference on Data Mining 2019.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Multi-relational learning, Tensor Completion
@proceedings{GTCORP2019b,
title = {Learning a Low-rank Tensor of Pharmacogenomic Multi-relations from Biomedical Networks},
author = {Zhuliu Li and Wei Zhang and R Stephanie Huang and Rui Kuang},
url = {http://compbio.cs.umn.edu/08970888.pdf},
year = {2019},
date = {2019-08-31},
urldate = {2019-08-31},
organization = {IEEE International Conference on Data Mining},
abstract = {Learning pharmacogenomic multi-relations among diseases, genes and chemicals from content-rich biomedical and biological networks can provide important guidance for drug discovery, drug repositioning and disease treatment. Most of the existing methods focus on imputing missing values in the diseasegene, disease-chemical and gene-chemical pairwise relations from the observed relations instead of being designed for learning high-order disease-gene-chemical multi-relations. To achieve the goal, we propose a general tensor-based optimization framework and a scalable Graph-Regularized Tensor Completion from Observed Pairwise Relations (GT-COPR) algorithm to infer the multi-relations among the entities across multiple networks in a low-rank tensor, based on manifold regularization with the graph Laplacian of a Cartesian, tensor or strong product of the networks, and consistencies between the collapsed tensors and the observed bipartite relations. Our theoretical analyses also prove the convergence and efficiency of GT-COPR. In the experiments, the tensor fiber-wise and slice-wise evaluations demonstrate the accuracy of GT-COPR for predicting the diseasegene-chemical associations across the large-scale protein-protein interactions network, chemical structural similarity network and phenotype-based human disease network; and the validation on Genomics of Drug Sensitivity in Cancer cell line dataset shows a potential clinical application of GT-COPR for learning diseasespecific chemical-gene interactions. Statistical enrichment analysis demonstrates that GT-COPR is also capable of producing both topologically and biologically relevant disease, gene and chemical components with high significance.
Source code: https://github.com/kuanglab/GT-COPR},
keywords = {Cancer Genomics, Multi-relational learning, Tensor Completion},
pubstate = {published},
tppubtype = {proceedings}
}
Source code: https://github.com/kuanglab/GT-COPR
2017
Zhang, Wei; Chien, Jeremy; Yong, Jeongsik; Kuang, Rui
Network-based Machine Learning and Graph Theory Algorithms for Precision Oncology Journal Article
In: NPJ Precision Oncology, no. 25, 2017.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Phenome-genome Association, Protein-Protein Interaction Network
@article{networkreview2017,
title = {Network-based Machine Learning and Graph Theory Algorithms for Precision Oncology},
author = {Wei Zhang and Jeremy Chien and Jeongsik Yong and Rui Kuang},
url = {https://www.nature.com/articles/s41698-017-0029-7},
doi = {doi:10.1038/s41698-017-0029-7},
year = {2017},
date = {2017-08-08},
urldate = {2017-08-08},
journal = {NPJ Precision Oncology},
number = {25},
abstract = {Network-based analytics plays an increasingly important role in precision oncology. Growing evidence in recent studies suggests that cancer can be better understood through mutated or dysregulated pathways or networks rather than individual mutations and that the efficacy of repositioned drugs can be inferred from disease modules in molecular networks. This article reviews network-based machine learning and graph theory algorithms for integrative analysis of personal genomic data and biomedical knowledge bases to identify tumor-specific molecular mechanisms, candidate targets and repositioned drugs for personalized treatment. The review focuses on the algorithmic design and mathematical formulation of these methods to facilitate applications and implementations of network-based analysis in the practice of precision oncology. We review the methods applied in three scenarios to integrate genomic data and network models in different analysis pipelines, and we examine three categories of network-based approaches for repositioning drugs in drug-disease-gene networks. In addition, we perform a comprehensive subnetwork/pathway analysis of mutations in 31 cancer genome projects in the Cancer Genome Atlas (TCGA) and present a detailed case study on ovarian cancer. Finally, we discuss interesting observations, potential pitfalls and future directions in network-based precision oncology.},
keywords = {Cancer Genomics, Phenome-genome Association, Protein-Protein Interaction Network},
pubstate = {published},
tppubtype = {article}
}
2015
Zhang, Wei; Chang, Jae-Woong; Lin, Lilong; Minn, Kay; Wu, Baolin; Chien, Jeremy; Yong, Jeongsik; Zheng, Hui; Kuang, Rui
Network-based Isoform Quantification with RNA-Seq Data for Cancer Transcriptome Analysis Journal Article
In: PLoS Computational Biology, vol. e1004465, 2015.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Isoform Quantification
@article{Net-RSTQ,
title = {Network-based Isoform Quantification with RNA-Seq Data for Cancer Transcriptome Analysis},
author = {Wei Zhang and Jae-Woong Chang and Lilong Lin and Kay Minn and Baolin Wu and Jeremy Chien and Jeongsik Yong and Hui Zheng and Rui Kuang},
url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004465},
doi = {http://dx.doi.org/10.1371/journal.pcbi.1004465},
year = {2015},
date = {2015-12-23},
urldate = {2015-12-23},
journal = {PLoS Computational Biology},
volume = {e1004465},
abstract = {New sequencing technologies for transcriptome-wide profiling of RNAs have greatly promoted the interest in isoform-based functional characterizations of a cellular system. Elucidation of gene expressions at the isoform resolution could lead to new molecular mechanisms such as gene-regulations and alternative splicings, and potentially better molecular signals for phenotype predictions. However, it could be overly optimistic to derive the proportion of the isoforms of a gene solely based on short read alignments. Inherently, systematical sampling biases from RNA library preparation and ambiguity of read origins in overlapping isoforms pose a problem in reliability. The work in this paper exams the possibility of using protein domain-domain interactions as prior knowledge in isoform transcript quantification. We first made the observation that protein domain-domain interactions positively correlate with isoform co-expressions in TCGA data and then designed a probabilistic EM approach to integrate domain-domain interactions with short read alignments for estimation of isoform proportions. Validated by qRT-PCR experiments on three cell lines, simulations and classifications of TCGA patient samples in several cancer types, Net-RSTQ is proven a useful tool for isoform-based analysis in functional genomes and systems biology.},
keywords = {Cancer Genomics, Isoform Quantification},
pubstate = {published},
tppubtype = {article}
}
2013
Zhang, Huanan; Tian, Ze; Kuang, Rui
Transfer learning across cancers on DNA copy number variation analysis Proceedings Article
In: 2013 IEEE 13th International Conference on Data Mining, pp. 1283–1288, IEEE IEEE, 2013, ISBN: 978-0-7695-5108-1.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Transfer Learning
@inproceedings{zhang2013transfer,
title = {Transfer learning across cancers on DNA copy number variation analysis},
author = {Huanan Zhang and Ze Tian and Rui Kuang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/TLFL-10Page.pdf},
doi = {10.1109/ICDM.2013.58},
isbn = {978-0-7695-5108-1},
year = {2013},
date = {2013-12-07},
urldate = {2013-12-07},
booktitle = {2013 IEEE 13th International Conference on Data Mining},
pages = {1283--1288},
publisher = {IEEE},
organization = {IEEE},
abstract = {Abstract:
DNA copy number variations (CNVs) are prevalent in all types of tumors. It is still a challenge to study how CNVs play a role in driving tumorgenic mechanisms that are either universal or specific in different cancer types. To address the problem, we introduce a transfer learning framework to discover common CNVs shared across different tumor types as well as CNVs specific to each tumor type from genome-wide CNV data measured by array CGH and SNP genotyping array. The proposed model, namely Transfer Learning with Fused LASSO (TLFL), detects latent CNV components from multiple CNV datasets of different tumor types to distinguish the CNVs that are common across the datasets and those that are specific in each dataset. Both the common and type-specific CNVs are detected as latent components in matrix factorization coupled with fused LASSO on adjacent CNV probe features. TLFL considers the common latent components underlying the multiple datasets to transfer knowledge across different tumor types. In simulations and experiments on real cancer CNV datasets, TLFL detected better latent components that can be used as features to improve classification of patient samples in each individual dataset compared with the model without the knowledge transfer. In cross-dataset analysis on bladder cancer and cross-domain analysis on breast cancer and ovarian cancer, TLFL also learned latent CNV components that are both predictive of tumor stages and correlate with known cancer genes.},
keywords = {Cancer Genomics, Transfer Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
DNA copy number variations (CNVs) are prevalent in all types of tumors. It is still a challenge to study how CNVs play a role in driving tumorgenic mechanisms that are either universal or specific in different cancer types. To address the problem, we introduce a transfer learning framework to discover common CNVs shared across different tumor types as well as CNVs specific to each tumor type from genome-wide CNV data measured by array CGH and SNP genotyping array. The proposed model, namely Transfer Learning with Fused LASSO (TLFL), detects latent CNV components from multiple CNV datasets of different tumor types to distinguish the CNVs that are common across the datasets and those that are specific in each dataset. Both the common and type-specific CNVs are detected as latent components in matrix factorization coupled with fused LASSO on adjacent CNV probe features. TLFL considers the common latent components underlying the multiple datasets to transfer knowledge across different tumor types. In simulations and experiments on real cancer CNV datasets, TLFL detected better latent components that can be used as features to improve classification of patient samples in each individual dataset compared with the model without the knowledge transfer. In cross-dataset analysis on bladder cancer and cross-domain analysis on breast cancer and ovarian cancer, TLFL also learned latent CNV components that are both predictive of tumor stages and correlate with known cancer genes.
Chien, Jeremy; Kuang, Rui; Landen, Charles; Shridhar, Viji
Platinum-sensitive recurrence in ovarian cancer: the role of tumor microenvironment Journal Article
In: Frontiers in oncology, vol. 3, pp. 251, 2013.
Abstract | Links | BibTeX | Tags: Cancer Genomics
@article{chien2013platinumb,
title = {Platinum-sensitive recurrence in ovarian cancer: the role of tumor microenvironment},
author = {Jeremy Chien and Rui Kuang and Charles Landen and Viji Shridhar},
url = {http://journal.frontiersin.org/article/10.3389/fonc.2013.00251/full},
doi = {10.3389/fonc.2013.00251},
year = {2013},
date = {2013-09-23},
urldate = {2013-09-23},
journal = {Frontiers in oncology},
volume = {3},
pages = {251},
publisher = {Frontiers},
abstract = {Despite several advances in the understanding of ovarian cancer pathobiology, in terms of driver genetic alterations in high-grade serous cancer, histologic heterogeneity of epithelial ovarian cancer, cell-of-origin for ovarian cancer, the survival rate from ovarian cancer is disappointingly low when compared to that of breast or prostate cancer. One of the factors contributing to the poor survival rate from ovarian cancer is the development of chemotherapy resistance following several rounds of chemotherapy. Although unicellular drug resistance mechanisms contribute to chemotherapy resistance, tumor microenvironment and the extracellular matrix (ECM), in particular, is emerging as a significant determinant of a tumor’s response to chemotherapy. In this review, we discuss the potential role of the tumor microenvironment in ovarian cancer recurrence and resistance to chemotherapy. Finally, we propose an alternative view of platinum-sensitive recurrence to describe a potential role of the ECM in the process.},
keywords = {Cancer Genomics},
pubstate = {published},
tppubtype = {article}
}
Zhang, Wei; Ota, Takayo; Shridhar, Viji; Chien, Jeremy; Wu, Baolin; Kuang, Rui
Network-based survival analysis reveals subnetwork signatures for predicting outcomes of ovarian cancer treatment Journal Article
In: PLoS Comput Biol, vol. 9, no. 3, pp. e1002975, 2013.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Survival Analysis
@article{zhang2013network,
title = {Network-based survival analysis reveals subnetwork signatures for predicting outcomes of ovarian cancer treatment},
author = {Wei Zhang and Takayo Ota and Viji Shridhar and Jeremy Chien and Baolin Wu and Rui Kuang},
url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002975},
doi = {10.1371/journal.pcbi.1002975},
year = {2013},
date = {2013-03-21},
urldate = {2013-03-21},
journal = {PLoS Comput Biol},
volume = {9},
number = {3},
pages = {e1002975},
publisher = {Public Library of Science},
abstract = {Cox regression is commonly used to predict the outcome by the time to an event of interest and in addition, identify relevant features for survival analysis in cancer genomics. Due to the high-dimensionality of high-throughput genomic data, existing Cox models trained on any particular dataset usually generalize poorly to other independent datasets. In this paper, we propose a network-based Cox regression model called Net-Cox and applied Net-Cox for a large-scale survival analysis across multiple ovarian cancer datasets. Net-Cox integrates gene network information into the Cox's proportional hazard model to explore the co-expression or functional relation among high-dimensional gene expression features in the gene network. Net-Cox was applied to analyze three independent gene expression datasets including the TCGA ovarian cancer dataset and two other public ovarian cancer datasets. Net-Cox with the network information from gene co-expression or functional relations identified highly consistent signature genes across the three datasets, and because of the better generalization across the datasets, Net-Cox also consistently improved the accuracy of survival prediction over the Cox models regularized by L1-norm or L2-norm. This study focused on analyzing the death and recurrence outcomes in the treatment of ovarian carcinoma to identify signature genes that can more reliably predict the events. The signature genes comprise dense protein-protein interaction subnetworks, enriched by extracellular matrix receptors and modulators or by nuclear signaling components downstream of extracellular signal-regulated kinases. In the laboratory validation of the signature genes, a tumor array experiment by protein staining on an independent patient cohort from Mayo Clinic showed that the protein expression of the signature gene FBN1 is a biomarker significantly associated with the early recurrence after 12 months of the treatment in the ovarian cancer patients who are initially sensitive to chemotherapy. Net-Cox toolbox is available at http://localhost/~raphaelpetegrosso/wpcb/Net-Cox/.},
keywords = {Cancer Genomics, Survival Analysis},
pubstate = {published},
tppubtype = {article}
}
2009
Tian, Ze; Hwang, TaeHyun; Kuang, Rui
A hypergraph-based learning algorithm for classifying gene expression and arrayCGH data with prior knowledge Journal Article
In: Bioinformatics, vol. 25, no. 21, pp. 2831–2838, 2009, ISSN: 1460-2059.
Abstract | Links | BibTeX | Tags: Cancer Genomics
@article{tian2009hypergraph,
title = {A hypergraph-based learning algorithm for classifying gene expression and arrayCGH data with prior knowledge},
author = {Ze Tian and TaeHyun Hwang and Rui Kuang},
url = {http://bioinformatics.oxfordjournals.org/content/25/21/2831.short},
doi = {10.1093/bioinformatics/btp467},
issn = {1460-2059},
year = {2009},
date = {2009-07-27},
urldate = {2009-07-27},
journal = {Bioinformatics},
volume = {25},
number = {21},
pages = {2831--2838},
publisher = {Oxford Univ Press},
abstract = {Motivation: Incorporating biological prior knowledge into predictive models is a challenging data integration problem in analyzing high-dimensional genomic data. We introduce a hypergraph-based semi-supervised learning algorithm called HyperPrior to classify gene expression and array-based comparative genomic hybridization (arrayCGH) data using biological knowledge as constraints on graph-based learning. HyperPrior is a robust two-step iterative method that alternatively finds the optimal labeling of the samples and the optimal weighting of the features, guided by constraints encoding prior knowledge. The prior knowledge for analyzing gene expression data is that cancer-related genes tend to interact with each other in a protein–protein interaction network. Similarly, the prior knowledge for analyzing arrayCGH data is that probes that are spatially nearby in their layout along the chromosomes tend to be involved in the same amplification or deletion event. Based on the prior knowledge, HyperPrior imposes a consistent weighting of the correlated genomic features in graph-based learning.
Results: We applied HyperPrior to test two arrayCGH datasets and two gene expression datasets for both cancer classification and biomarker identification. On all the datasets, HyperPrior achieved competitive classification performance, compared with SVMs and the other baselines utilizing the same prior knowledge. HyperPrior also identified several discriminative regions on chromosomes and discriminative subnetworks in the PPI, both of which contain cancer-related genomic elements. Our results suggest that HyperPrior is promising in utilizing biological prior knowledge to achieve better classification performance and more biologically interpretable findings in gene expression and arrayCGH data.},
keywords = {Cancer Genomics},
pubstate = {published},
tppubtype = {article}
}
Results: We applied HyperPrior to test two arrayCGH datasets and two gene expression datasets for both cancer classification and biomarker identification. On all the datasets, HyperPrior achieved competitive classification performance, compared with SVMs and the other baselines utilizing the same prior knowledge. HyperPrior also identified several discriminative regions on chromosomes and discriminative subnetworks in the PPI, both of which contain cancer-related genomic elements. Our results suggest that HyperPrior is promising in utilizing biological prior knowledge to achieve better classification performance and more biologically interpretable findings in gene expression and arrayCGH data.
2008
Hwang, TaeHyun; Tian, Ze; Kuang, Rui; Kocher, Jean-Pierre
Learning on weighted hypergraphs to integrate protein interactions and gene expressions for cancer outcome prediction Proceedings Article
In: 2008 Eighth IEEE International Conference on Data Mining, pp. 293–302, IEEE 2008, ISBN: 978-0-7695-3502-9.
Abstract | Links | BibTeX | Tags: Cancer Genomics, Protein-Protein Interaction Network
@inproceedings{hwang2008learning,
title = {Learning on weighted hypergraphs to integrate protein interactions and gene expressions for cancer outcome prediction},
author = {TaeHyun Hwang and Ze Tian and Rui Kuang and Jean-Pierre Kocher},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/HyperGene.pdf},
doi = {10.1109/ICDM.2008.37},
isbn = {978-0-7695-3502-9},
year = {2008},
date = {2008-12-15},
urldate = {2008-12-15},
booktitle = {2008 Eighth IEEE International Conference on Data Mining},
pages = {293--302},
organization = {IEEE},
abstract = {Abstract:
Building reliable predictive models from multiple complementary genomic data for cancer study is a crucial step towards successful cancer treatment and a full understanding of the underlying biological principles. To tackle this challenging data integration problem, we propose a hypergraph-based learning algorithm called HyperGene to integrate microarray gene expressions and protein-protein interactions for cancer outcome prediction and biomarker identification. HyperGene is a robust two-step iterative method that alternatively finds the optimal outcome prediction and the optimal weighting of the marker genes guided by a protein-protein interaction network. Under the hypothesis that cancer-related genes tend to interact with each other, the HyperGene algorithm uses a protein-protein interaction network as prior knowledge by imposing a consistent weighting of interacting genes. Our experimental results on two large-scale breast cancer gene expression datasets show that HyperGene utilizing a curated protein-protein interaction network achieves significantly improved cancer outcome prediction. Moreover, HyperGene can also retrieve many known cancer genes as highly weighted marker genes.},
keywords = {Cancer Genomics, Protein-Protein Interaction Network},
pubstate = {published},
tppubtype = {inproceedings}
}
Building reliable predictive models from multiple complementary genomic data for cancer study is a crucial step towards successful cancer treatment and a full understanding of the underlying biological principles. To tackle this challenging data integration problem, we propose a hypergraph-based learning algorithm called HyperGene to integrate microarray gene expressions and protein-protein interactions for cancer outcome prediction and biomarker identification. HyperGene is a robust two-step iterative method that alternatively finds the optimal outcome prediction and the optimal weighting of the marker genes guided by a protein-protein interaction network. Under the hypothesis that cancer-related genes tend to interact with each other, the HyperGene algorithm uses a protein-protein interaction network as prior knowledge by imposing a consistent weighting of interacting genes. Our experimental results on two large-scale breast cancer gene expression datasets show that HyperGene utilizing a curated protein-protein interaction network achieves significantly improved cancer outcome prediction. Moreover, HyperGene can also retrieve many known cancer genes as highly weighted marker genes.
Hwang, TaeHyun; Sicotte, Hugues; Tian, Ze; Wu, Baolin; Kocher, Jean-Pierre; Wigle, Dennis A; Kumar, Vipin; Kuang, Rui
Robust and efficient identification of biomarkers by classifying features on graphs Journal Article
In: Bioinformatics, vol. 24, no. 18, pp. 2023–2029, 2008, ISBN: 1460-2059.
Abstract | Links | BibTeX | Tags: Cancer Genomics
@article{hwang2008robustb,
title = {Robust and efficient identification of biomarkers by classifying features on graphs},
author = {Hwang, TaeHyun and Sicotte, Hugues and Tian, Ze and Wu, Baolin and Kocher, Jean-Pierre and Wigle, Dennis A and Kumar, Vipin and Kuang, Rui},
url = {http://bioinformatics.oxfordjournals.org/content/24/18/2023.short},
doi = {10.1093/bioinformatics/btn383},
isbn = {1460-2059},
year = {2008},
date = {2008-01-01},
urldate = {2008-01-01},
journal = {Bioinformatics},
volume = {24},
number = {18},
pages = {2023--2029},
publisher = {Oxford Univ Press},
abstract = {Motivation: A central problem in biomarker discovery from large-scale gene expression or single nucleotide polymorphism (SNP) data is the computational challenge of taking into account the dependence among all the features. Methods that ignore the dependence usually identify non-reproducible biomarkers across independent datasets. We introduce a new graph-based semi-supervised feature classification algorithm to identify discriminative disease markers by learning on bipartite graphs. Our algorithm directly classifies the feature nodes in a bipartite graph as positive, negative or neutral with network propagation to capture the dependence among both samples and features (clinical and genetic variables) by exploring bi-cluster structures in a graph. Two features of our algorithm are: (1) our algorithm can find a global optimal labeling to capture the dependence among all the features and thus, generates highly reproducible results across independent microarray or other high-thoughput datasets, (2) our algorithm is capable of handling hundreds of thousands of features and thus, is particularly useful for biomarker identification from high-throughput gene expression and SNP data. In addition, although designed for classifying features, our algorithm can also simultaneously classify test samples for disease prognosis/diagnosis.
Results: We applied the network propagation algorithm to study three large-scale breast cancer datasets. Our algorithm achieved competitive classification performance compared with SVMs and other baseline methods, and identified several markers with clinical or biological relevance with the disease. More importantly, our algorithm also identified highly reproducible marker genes and enriched functions from the independent datasets.
Availability: Supplementary results and source code are available at http://localhost/~raphaelpetegrosso/wpcb/Feature_Class.},
keywords = {Cancer Genomics},
pubstate = {published},
tppubtype = {article}
}
Results: We applied the network propagation algorithm to study three large-scale breast cancer datasets. Our algorithm achieved competitive classification performance compared with SVMs and other baseline methods, and identified several markers with clinical or biological relevance with the disease. More importantly, our algorithm also identified highly reproducible marker genes and enriched functions from the independent datasets.
Availability: Supplementary results and source code are available at http://localhost/~raphaelpetegrosso/wpcb/Feature_Class.