2017
Zhang, Huanan; Roe, David; Kuang, Rui
Detecting Population-differentiation Copy Number Variants in Human Population Tree by Sparse Group Selection Journal Article
In: IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 16, no. 2, pp. 538 - 549, 2017.
Abstract | Links | BibTeX | Tags: Sparse Group Learning
@article{Kuang2017,
title = {Detecting Population-differentiation Copy Number Variants in Human Population Tree by Sparse Group Selection},
author = { Huanan Zhang and David Roe and Rui Kuang},
url = {http://ieeexplore.ieee.org/document/8168351/},
year = {2017},
date = {2017-12-08},
journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
volume = {16},
number = {2},
pages = {538 - 549},
abstract = {Copy-number variants (CNVs) account for a substantial proportion of human genetic variations. Understanding the CNV diversities across populations is a computational challenge because CNV patterns are often present in several related populations and only occur in a subgroup of individuals within each of the population. This paper introduces a tree-guided sparse group selection algorithm (treeSGS) to detect population-differentiation CNV markers of subgroups across populations organized by a phylogenetic tree of human populations. The treeSGS algorithm detects CNV markers of populations associated with nodes from all levels of the tree such that the evolutionary relations among the populations are incorporated for more accurate detection of population-differentiation CNVs. We applied treeSGS algorithm to study the 1179 samples from the 11 populations in Hapmap3 CNV data. The treeSGS algorithm accurately identifies CNV markers of each population and the collection of populations organized under the branches of the human population tree, validated by consistency among family trios and SNP characterizations of the CNV regions. Further comparison between the detected CNV markers and other population-differentiation CNVs reported in 1000 genome data and other recent studies also shows that treeSGS can significantly improve the current annotations of population-differentiation CNV markers. TreeSGS package is available at http://compbio.cs.umn.edu/treesgs.},
keywords = {Sparse Group Learning},
pubstate = {published},
tppubtype = {article}
}
Copy-number variants (CNVs) account for a substantial proportion of human genetic variations. Understanding the CNV diversities across populations is a computational challenge because CNV patterns are often present in several related populations and only occur in a subgroup of individuals within each of the population. This paper introduces a tree-guided sparse group selection algorithm (treeSGS) to detect population-differentiation CNV markers of subgroups across populations organized by a phylogenetic tree of human populations. The treeSGS algorithm detects CNV markers of populations associated with nodes from all levels of the tree such that the evolutionary relations among the populations are incorporated for more accurate detection of population-differentiation CNVs. We applied treeSGS algorithm to study the 1179 samples from the 11 populations in Hapmap3 CNV data. The treeSGS algorithm accurately identifies CNV markers of each population and the collection of populations organized under the branches of the human population tree, validated by consistency among family trios and SNP characterizations of the CNV regions. Further comparison between the detected CNV markers and other population-differentiation CNVs reported in 1000 genome data and other recent studies also shows that treeSGS can significantly improve the current annotations of population-differentiation CNV markers. TreeSGS package is available at http://compbio.cs.umn.edu/treesgs.
2012
Tian, Ze; Zhang, Huanan; Kuang, Rui
Sparse group selection on fused lasso components for identifying group-specific DNA copy number variations Best Paper Proceedings Article
In: 2012 IEEE 12th International Conference on Data Mining, pp. 665–674, IEEE IEEE, 2012, ISBN: 978-1-4673-4649-8.
Abstract | Links | BibTeX | Tags: Sparse Group Learning
@inproceedings{tian2012sparse,
title = {Sparse group selection on fused lasso components for identifying group-specific DNA copy number variations},
author = {Ze Tian and Huanan Zhang and Rui Kuang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/SGS-FL.pdf},
doi = {10.1109/ICDM.2012.35},
isbn = {978-1-4673-4649-8},
year = {2012},
date = {2012-12-10},
urldate = {2012-12-10},
booktitle = {2012 IEEE 12th International Conference on Data Mining},
pages = {665--674},
publisher = {IEEE},
organization = {IEEE},
abstract = {Detecting DNA copy number variations (CNVs) from arrayCGH or genotyping-array data to correlate with cancer outcomes is crucial for understanding the molecular mechanisms underlying cancer. Previous methods either focus on detecting CNVs in each individual patient sample or common CNVs across all the patient samples. These methods ignore the discrepancies introduced by the heterogeneity in the patient samples, which implies that common CNVs might only be shared within some groups of samples instead of all samples. In this paper, we propose a latent feature model that couples sparse sample group selection with fused lasso on CNV components to identify group-specific CNVs. Assuming a given group structure on patient samples by clinical information, sparse group selection on fused lasso (SGS-FL) identifies the optimal latent CNV components, each of which is specific to the samples in one or several groups. The group selection for each CNV component is determined dynamically by an adaptive algorithm to achieve a desired sparsity. Simulation results show that SGS-FL can more accurately identify the latent CNV components when there is a reliable underlying group structure in the samples. In the experiments on arrayCGH breast cancer and bladder cancer datasets, SGS-FL detected CNV regions that are more relevant to cancer, and provided latent feature weights that can be used for better sample classification.},
keywords = {Sparse Group Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Detecting DNA copy number variations (CNVs) from arrayCGH or genotyping-array data to correlate with cancer outcomes is crucial for understanding the molecular mechanisms underlying cancer. Previous methods either focus on detecting CNVs in each individual patient sample or common CNVs across all the patient samples. These methods ignore the discrepancies introduced by the heterogeneity in the patient samples, which implies that common CNVs might only be shared within some groups of samples instead of all samples. In this paper, we propose a latent feature model that couples sparse sample group selection with fused lasso on CNV components to identify group-specific CNVs. Assuming a given group structure on patient samples by clinical information, sparse group selection on fused lasso (SGS-FL) identifies the optimal latent CNV components, each of which is specific to the samples in one or several groups. The group selection for each CNV component is determined dynamically by an adaptive algorithm to achieve a desired sparsity. Simulation results show that SGS-FL can more accurately identify the latent CNV components when there is a reliable underlying group structure in the samples. In the experiments on arrayCGH breast cancer and bladder cancer datasets, SGS-FL detected CNV regions that are more relevant to cancer, and provided latent feature weights that can be used for better sample classification.