2010
Tian, Ze; Kuang, Rui
Integrative classification and analysis of multiple arrayCGH datasets with probe alignment Journal Article
In: Bioinformatics, vol. 26, no. 18, pp. 2313–2320, 2010, ISSN: 1460-2059.
Abstract | Links | BibTeX | Tags: Kernel Method
@article{tian2010integrative,
title = {Integrative classification and analysis of multiple arrayCGH datasets with probe alignment},
author = {Ze Tian and Rui Kuang},
url = {http://bioinformatics.oxfordjournals.org/content/26/18/2313.short},
doi = {10.1093/bioinformatics/btq428},
issn = {1460-2059},
year = {2010},
date = {2010-07-18},
journal = {Bioinformatics},
volume = {26},
number = {18},
pages = {2313--2320},
publisher = {Oxford Univ Press},
abstract = {Motivation: Array comparative genomic hybridization (arrayCGH) is widely used to measure DNA copy numbers in cancer research. ArrayCGH data report log-ratio intensities of thousands of probes sampled along the chromosomes. Typically, the choices of the locations and the lengths of the probes vary in different experiments. This discrepancy in choosing probes poses a challenge in integrated classification or analysis across multiple arrayCGH datasets. We propose an alignment-based framework to integrate arrayCGH samples generated from different probe sets. The alignment framework seeks an optimal alignment between the probe series of one arrayCGH sample and the probe series of another sample, intended to find the maximum possible overlap of DNA copy number variations between the two measured chromosomes. An alignment kernel is introduced for integrative patient sample classification and a multiple alignment algorithm is also introduced for identifying common regions with copy number aberrations.
Results: The probe alignment kernel and the MPA algorithm were experimented to integrate three bladder cancer datasets as well as artificial datasets. In the experiments, by integrating arrayCGH samples from multiple datasets, the probe alignment kernel used with support vector machines significantly improved patient sample classification accuracy over other baseline kernels. The experiments also demonstrated that the multiple probe alignment (MPA) algorithm can find common DNA aberrations that cannot be identified with the standard interpolation method. Furthermore, the MPA algorithm also identified many known bladder cancer DNA aberrations containing four known bladder cancer genes, three of which cannot be detected by interpolation.},
keywords = {Kernel Method},
pubstate = {published},
tppubtype = {article}
}
Results: The probe alignment kernel and the MPA algorithm were experimented to integrate three bladder cancer datasets as well as artificial datasets. In the experiments, by integrating arrayCGH samples from multiple datasets, the probe alignment kernel used with support vector machines significantly improved patient sample classification accuracy over other baseline kernels. The experiments also demonstrated that the multiple probe alignment (MPA) algorithm can find common DNA aberrations that cannot be identified with the standard interpolation method. Furthermore, the MPA algorithm also identified many known bladder cancer DNA aberrations containing four known bladder cancer genes, three of which cannot be detected by interpolation.
2009
Min, Martin Renqiang; Kuang, Rui; Bonner, Anthony J; Zhang, Zhaolei
Learning Random-Walk Kernels for Protein Remote Homology Identification and Motif Discovery. Proceedings Article
In: SDM, pp. 133–144, SIAM 2009, ISBN: 978-0-89871-682-5.
Abstract | Links | BibTeX | Tags: Kernel Method, Protein Remote Homology Detection
@inproceedings{min2009learning,
title = {Learning Random-Walk Kernels for Protein Remote Homology Identification and Motif Discovery.},
author = {Martin Renqiang Min and Rui Kuang and Anthony J Bonner and Zhaolei Zhang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/12E97816119727952E12.pdf},
doi = {10.1137/1.9781611972795.12},
isbn = {978-0-89871-682-5},
year = {2009},
date = {2009-04-30},
booktitle = {SDM},
pages = {133--144},
organization = {SIAM},
abstract = {Random-walk based algorithms are good choices for solving many classification problems with limited labeled data and a large amount of unlabeled data. However, it is difficult to choose the optimal number of random steps, and the results are very sensitive to the parameter chosen. In this paper, we will discuss how to better identify protein remote homology than any other algorithm using a learned random-walk kernel based on a positive linear combination of random-walk kernels with different random steps, which leads to a convex combination of kernels. The resulting kernel has much better prediction performance than the state-of-the-art profile kernel for protein remote homology identification. On the SCOP benchmark dataset, the overall mean ROC50 score on 54 protein families we obtained using the new kernel is above 0.90, which has almost perfect prediction performance on most of the 54 families and has significant improvement over the best published result; moreover, our approach based on learned random-walk kernels can effectively identify meaningful protein sequence motifs that are responsible for discriminating the memberships of protein sequences' remote homology in SCOP.},
keywords = {Kernel Method, Protein Remote Homology Detection},
pubstate = {published},
tppubtype = {inproceedings}
}
Ngo, Thanh; Kuang, Rui
Partial profile alignment kernels for protein classification Proceedings Article
In: 2009 IEEE International Workshop on Genomic Signal Processing and Statistics, pp. 1–4, IEEE 2009, ISBN: 978-1-4244-4761-9.
Abstract | Links | BibTeX | Tags: Kernel Method, Protein Remote Homology Detection
@inproceedings{ngo2009partial,
title = {Partial profile alignment kernels for protein classification},
author = {Thanh Ngo and Rui Kuang},
url = {http://compbio.cs.umn.edu/wp-content/uploads/2017/10/05174328.pdf},
doi = {10.1109/GENSIPS.2009.5174328},
isbn = {978-1-4244-4761-9},
year = {2009},
date = {2009-01-01},
booktitle = {2009 IEEE International Workshop on Genomic Signal Processing and Statistics},
pages = {1--4},
organization = {IEEE},
abstract = {Remote homology detection and fold recognition are the central problems in protein classification. In real applications, kernel algorithms that are both accurate and efficient are required for classification of large databases. We explore a class of partial profile alignment kernels to be used with support vector machines (SVMs) for remote homology detection and fold recognition. While existing profile-based kernels use the whole profiles to determine the similarity between pairs of proteins, the partial profile alignment kernels are derived from part of the position specific scoring matrices (PSSMs) in the profiles for alignment. Specifically, at each position in the PSSM, only amino acids in the mutation neighborhood of the corresponding amino acid in the original protein sequence are considered for alignment to remove noise and improve computing efficiency. Our experiments on SCOP bench datasets show that the partial profile alignment kernels achieved overall better classification results for both fold recognition and remote homology detection than profile kernels and profile-alignment kernels. In addition, our algorithm using only a fraction of the profiles saves the cost of computing the kernels significantly, compared to the full-profile alignment methods.},
keywords = {Kernel Method, Protein Remote Homology Detection},
pubstate = {published},
tppubtype = {inproceedings}
}