Вернуться в репозиторий Tikhonova Polina. Homework 5.

In [ ]:
library(fossil)
library(iClusterPlus)
In [2]:
load('brca.dat')
In [3]:
data.full = data
d.1 = data.full$mRNA
d.2 = data.full$miRNA
d.3 = data.full$proteomics
elements = list(list(d.1, d.2, d.3), 
                list(d.1, d.2, NULL), 
                list(d.2, d.3, NULL), 
                list(d.1, d.3, NULL), 
                list(d.1, NULL, NULL), 
                list(d.2, NULL, NULL), 
                list(d.3, NULL, NULL))

Icluster

In [4]:
icluster = sapply(elements, function(x){
    fit.all=iClusterPlus(
                  dt1 = t(x[[1]]),
                  dt2 <- if (!is.null(x[[2]])) t(x[[2]]) else NULL,
                  dt3 <- if (!is.null(x[[3]])) t(x[[3]]) else NULL,
                  type=c("gaussian","gaussian","gaussian"),
                  lambda=c(0.04,0.61,0.90),K=2,maxiter=10)
    return(rand.index(fit.all$clusters, as.numeric(factor(Y, labels = c(1,2,3)))))
})

Kmeans

In [5]:
kmeans = sapply(elements, function(x){ 
    big.data = rbind(scale(x[[1]]), 
                     if (!is.null(x[[2]])) scale(x[[2]]) else NULL,
                     if (!is.null(x[[3]])) scale(x[[3]]) else NULL)
    big.data = scale(big.data)
    set.seed(105)
    kmeans.all.fit = kmeans(t(big.data),3)
    return(rand.index(kmeans.all.fit$cluster,as.numeric(factor(Y, labels = c(1,2,3)))))
})

Hclust

In [6]:
hclust = sapply(elements, function(x){
    big.data = rbind(scale(x[[1]]), 
                     if (!is.null(x[[2]])) scale(x[[2]]) else NULL,
                     if (!is.null(x[[3]])) scale(x[[3]]) else NULL)
    big.data = scale(big.data)
    d <- dist(t(big.data), method = "euclidean") # distance matrix
    fit <- hclust(d, method="average") 
    groups <- cutree(fit, k=3)
    return(rand.index(groups,as.numeric(factor(Y, labels = c(1,2,3)))))
})

Results

In [7]:
table = rbind(icluster, kmeans, hclust)
colnames(table) = c('All', 'mRNA&miRNA', 'miRNA&prot', 'mRNA&prot', 'mRNA', 'miRNA', 'prot')
.=c('','','')
best_result = sapply(list(icluster, kmeans, hclust), max)
best_type =  sapply(list(icluster, kmeans, hclust), function(x){colnames(table)[which(x==max(x))]})
cbind(table,., best_result, best_type)
cat('Замечание: Icluster и Kmeans с каждым запуском могут давать разные результаты.')
AllmRNA&miRNAmiRNA&protmRNA&protmRNAmiRNAprot.best_resultbest_type
icluster0.7463982 0.7449664 0.670962 0.7463982 0.7345861 0.6719463 0.6977181 0.7463982 All , mRNA&prot
kmeans0.975123 0.8585235 0.8910067 0.975123 0.8392841 0.7855928 0.8964653 0.975123 All , mRNA&prot
hclust0.8528859 0.3893512 0.830604 0.8575391 0.3893512 0.3812975 0.8353468 0.8575391 mRNA&prot
Замечание: Icluster и Kmeans с каждым запуском могут давать разные результаты.

Additional

Ради интереса запустила еще Mixomics

In [8]:
#install.packages("mixOmics")
library(mixOmics)
df = list(mRNA = t(d.1), miRNA = t(d.2), proteomics = t(d.3))
mixOm = mixOmics(df, Y=Y, ncomp=3)
plot(mixOm)
Warning message:
"package 'mixOmics' was built under R version 3.3.3"Loading required package: MASS
Warning message:
"package 'MASS' was built under R version 3.3.3"Loading required package: lattice
Warning message:
"package 'lattice' was built under R version 3.3.3"Loading required package: ggplot2
Warning message:
"package 'ggplot2' was built under R version 3.3.3"
Loaded mixOmics 6.3.0

Visit http://www.mixOmics.org for more details about our methods.
Any bug reports or comments? Notify us at mixomics at math.univ-toulouse.fr or https://bitbucket.org/klecao/package-mixomics/issues

Thank you for using mixOmics!

Attaching package: 'mixOmics'

The following object is masked from 'package:maps':

    map

a block Partial Least Squares - Discriminant Analysis is being performed (block.PLS-DA)