Introduction to R

Subsetting Lists

Suppose x is a list object in R. The follwoing chunk is showing you how to extract information from a list object or how to access it’s elements.

x<-list(A=1:6,B=c("a","b","c","d"))
x[1]

## $A
## [1] 1 2 3 4 5 6

x[[1]]

## [1] 1 2 3 4 5 6

x$A

## [1] 1 2 3 4 5 6

x<-list(course=689, credit=3, sem="FALL")
x[c(1,3)]

## $course
## [1] 689
## 
## $sem
## [1] "FALL"

name="credit"
x[[name]]

## [1] 3

x<-list(A=1:10,B=seq(20,25,by=1))
x[[2]][4]

## [1] 23

Subsetting matrices

x<-matrix(1:6,nrow=2,ncol=3)
x[1,2]

## [1] 3

x[1,2,  drop = F]

##      [,1]
## [1,]    3

#or 
x[1,]

## [1] 1 3 5

x[,2]

## [1] 3 4

x[1,,drop=F]

##      [,1] [,2] [,3]
## [1,]    1    3    5

Removing NA values

x<-c(1,2,NA,4,NA,5)
bad<-is.na(x)
x[!bad]

## [1] 1 2 4 5

y<-c("a","b",NA,"d",NA,"f")
select<-complete.cases(x,y)
x[select]

## [1] 1 2 4 5

y[select]

## [1] "a" "b" "d" "f"

data<-matrix(rnorm(100,0,1),nrow=10)
data[4,1]<-NA; data[6,2]<-NA; data[4,6]<-NA
keep<-complete.cases(data)
data[keep,][1:6,]

##            [,1]       [,2]       [,3]       [,4]       [,5]        [,6]
## [1,] -0.6271490  1.2794757 -0.2335086  0.4584832 -0.2036284  0.62244857
## [2,]  1.0832882 -0.3740906 -2.2665337 -1.0861524  0.5265468 -0.02471441
## [3,] -0.0717052 -1.4466038  0.1464550 -0.4964407 -0.1839417  1.86649438
## [4,] -0.1008556 -0.4059537 -1.6729458  1.4533749  2.2994596  1.29240559
## [5,] -0.7173906 -1.1021700  0.6713288 -0.5083368 -0.4972138 -0.07170019
## [6,] -0.5604094 -1.1133294  0.3064856  0.8752643  1.4867615 -0.82845830
##             [,7]        [,8]       [,9]       [,10]
## [1,] -0.40881749  0.29435523  0.6394091  1.63816964
## [2,] -0.13988885 -0.73297479 -1.3347625  0.48934312
## [3,] -1.07999952  0.05150205  0.6869289 -0.42631770
## [4,]  0.75537387 -0.32165687 -0.5780814  0.04231296
## [5,]  0.02394506  1.38402423  0.4705631 -0.95026361
## [6,] -0.02787642  0.42128630 -1.0245564 -2.79929037

matrix multiplication

x<-matrix(rpois(20,1.5),nrow=4)
y<-matrix(rpois(20,2),nrow=4)
x%*%t(y)

##      [,1] [,2] [,3] [,4]
## [1,]   16   21    8   19
## [2,]   14   19   14   23
## [3,]   16   21   14   22
## [4,]   11   15    5   11

control structures

if(<condition>){
   ## do something
}else {
    ## do something else
}

Example

x<-c(1:3,13:19)
count<-rep(0,length(x))
for(i in 1:length(x)){
if(x[i]%%3==0){
 count[i]<-1 
}else{
  count[i]<-0
}
}
count

##  [1] 0 0 1 0 0 1 0 0 1 0

Nested loops

x<-matrix(1:6,2,3)
for(i in 1:nrow(x)){
  for(j in 1:ncol(x)){
    print(x[i,j])
  }
}

## [1] 1
## [1] 3
## [1] 5
## [1] 2
## [1] 4
## [1] 6

count<-0
while(count< 10){
  print(count)
  count<-count+1
}

## [1] 0
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9

count<-10
while(count>=8 && count<=15){
  print(count)
  y<-rbinom(1,1,0.5)
  if(y==1){
    count<-count+1
  }else{
    count<-count-1
  
       }
  }

## [1] 10
## [1] 11
## [1] 12
## [1] 11
## [1] 12
## [1] 11
## [1] 10
## [1] 11
## [1] 10
## [1] 9
## [1] 10
## [1] 11
## [1] 10
## [1] 9
## [1] 8
## [1] 9
## [1] 8

Application of lapply

x<-list(a=1:6,b=rnorm(20))
lapply(x,mean)

## $a
## [1] 3.5
## 
## $b
## [1] 0.3309427

#OR

n= 20:25
lapply(n,rnorm)

## [[1]]
##  [1]  0.48854714 -1.26325141 -0.29414303  0.27281617 -1.42467594
##  [6] -1.41259012 -1.23597651  1.15876777  0.19443662  0.05161971
## [11] -0.05172900  0.10142570 -1.41401398  0.24866032  0.27696152
## [16]  0.17280489 -0.24053153 -0.16559129 -0.70341867  1.34216153
## 
## [[2]]
##  [1] -0.5109436 -0.5043873 -0.1904385 -0.6657738  0.6874222 -0.2342082
##  [7]  1.2897946 -1.0574992  2.2516772 -0.6805431 -1.1521277  0.5283933
## [13] -0.1562630  0.8580294 -0.4970907 -0.4663930  0.1992250  0.9303913
## [19] -0.8088025 -1.6718232 -1.6894229
## 
## [[3]]
##  [1] -1.4633121 -0.3683112 -0.5123979 -1.5042865 -0.7626365  0.1916466
##  [7]  1.3997910  0.2917191 -0.3723488  0.7039750  1.7656462 -0.1924491
## [13]  0.6952221  0.5774937  0.8043867  0.1397612 -0.1675083 -1.1934418
## [19]  0.9435622  0.0465076 -1.0160125 -2.2722410
## 
## [[4]]
##  [1] -0.11355655 -1.34656848  1.18613838  0.80816823  1.48430545
##  [6] -0.12628790 -1.02506334 -0.93503002  1.67641350 -0.76981173
## [11]  0.94198329 -0.39369622 -0.31050915  2.00319961  1.37329376
## [16] -1.24256141  0.32519004 -0.93915578  0.71280314  1.38358481
## [21]  0.86829649  0.05292082 -2.37386582
## 
## [[5]]
##  [1]  1.45937594  0.72422439  0.43744336 -0.49635735  1.33251070
##  [6] -1.51985626  0.95858907 -0.52526699  0.67981929 -0.29853150
## [11]  1.08021909 -1.21664542  0.03802331  0.30913841 -0.10060330
## [16]  1.44724505  0.28063096  1.21657339  0.21004710  1.23603779
## [21]  1.39252155 -1.65432004  0.49228912 -0.64943850
## 
## [[6]]
##  [1]  0.18097612  1.36738417 -0.66978994 -1.34089326 -1.02262208
##  [6] -0.05907545  0.74275686  0.04471133 -0.99234373  0.79910909
## [11]  1.64772092 -0.42724391  1.55052761  0.61758758  0.80334260
## [16] -0.21677186  1.79323008 -1.34608079 -0.36288422  0.03447574
## [21] -1.51644172 -1.14826493  0.00732521  0.91001525  0.35684399

Application of apply

x<-matrix(rnorm(10),nrow=5)
apply(x,2,mean)

## [1]  0.13151458 -0.03733971

apply(x,1,mean)

## [1]  0.8111073  0.8445551  0.2546650 -1.0039340 -0.6709563

#rowSums; colMeans

use of strsplit

data<-read.csv("TobaccoVars.csv",sep=",",header=T)
rownames(data)<-data[,1];data<-data[,-1]
N<-colnames(data);
n<-strsplit(N[2:33],"[.]")
n[1:10]

## [[1]]
## [1] "hsa" "let" "7b" 
## 
## [[2]]
## [1] "hsa" "let" "7d" 
## 
## [[3]]
## [1] "hsa" "mir" "10a"
## 
## [[4]]
## [1] "hsa"  "mir"  "1266"
## 
## [[5]]
## [1] "hsa" "mir" "128" "1"  
## 
## [[6]]
## [1] "hsa"  "mir"  "1287"
## 
## [[7]]
## [1] "hsa"  "mir"  "1293"
## 
## [[8]]
## [1] "hsa"  "mir"  "1304"
## 
## [[9]]
## [1] "hsa"  "mir"  "135b"
## 
## [[10]]
## [1] "hsa"  "mir"  "181d"

Introduction to R — part II

Debdeep Pati (Acknowledgement to Moumita Karmakar and Roger D. Peng)

Subsetting Lists

Subsetting matrices

Removing NA values

matrix multiplication

control structures

Application of lapply

Application of apply

use of strsplit