Chapter 7 MSigDb analysis

The MSigDB is a collection of annotated gene sets, it include 8 major collections:

  • H: hallmark gene sets
  • C1: positional gene sets
  • C2: curated gene sets
  • C3: motif gene sets
  • C4: computational gene sets
  • C5: GO gene sets
  • C6: oncogenic signatures
  • C7: immunologic signatures

Users can use enricher and GSEA function to analyze gene set collections downloaded from Molecular Signatures Database (MSigDb). clusterProfiler provides a function, read.gmt, to parse the gmt file into a TERM2GENE data.frame that is ready for both enricher and GSEA functions.

data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]

gmtfile <- system.file("extdata", "c5.cc.v5.0.entrez.gmt", package="clusterProfiler")
c5 <- read.gmt(gmtfile)

egmt <- enricher(gene, TERM2GENE=c5)
head(egmt)
##                                                ID
## SPINDLE                                   SPINDLE
## MICROTUBULE_CYTOSKELETON MICROTUBULE_CYTOSKELETON
## CYTOSKELETAL_PART               CYTOSKELETAL_PART
## SPINDLE_MICROTUBULE           SPINDLE_MICROTUBULE
## MICROTUBULE                           MICROTUBULE
## CYTOSKELETON                         CYTOSKELETON
##                                       Description GeneRatio
## SPINDLE                                   SPINDLE     11/82
## MICROTUBULE_CYTOSKELETON MICROTUBULE_CYTOSKELETON     16/82
## CYTOSKELETAL_PART               CYTOSKELETAL_PART     15/82
## SPINDLE_MICROTUBULE           SPINDLE_MICROTUBULE      5/82
## MICROTUBULE                           MICROTUBULE      6/82
## CYTOSKELETON                         CYTOSKELETON     16/82
##                           BgRatio       pvalue     p.adjust
## SPINDLE                   39/5270 7.667674e-12 5.214018e-10
## MICROTUBULE_CYTOSKELETON 152/5270 8.449298e-10 2.872761e-08
## CYTOSKELETAL_PART        235/5270 2.414879e-06 5.237096e-05
## SPINDLE_MICROTUBULE       16/5270 3.080645e-06 5.237096e-05
## MICROTUBULE               32/5270 7.740446e-06 1.052701e-04
## CYTOSKELETON             367/5270 1.308357e-04 1.482805e-03
##                                qvalue
## SPINDLE                  4.197043e-10
## MICROTUBULE_CYTOSKELETON 2.312439e-08
## CYTOSKELETAL_PART        4.215619e-05
## SPINDLE_MICROTUBULE      4.215619e-05
## MICROTUBULE              8.473751e-05
## CYTOSKELETON             1.193589e-03
##                                                                                                  geneID
## SPINDLE                                           991/9493/9787/22974/983/332/3832/7272/9055/6790/24137
## MICROTUBULE_CYTOSKELETON 991/9493/9133/7153/9787/22974/4751/983/332/3832/7272/9055/6790/24137/4137/7802
## CYTOSKELETAL_PART             991/9493/7153/9787/22974/4751/983/332/3832/7272/9055/6790/24137/4137/7802
## SPINDLE_MICROTUBULE                                                             983/332/3832/9055/24137
## MICROTUBULE                                                                983/332/3832/9055/24137/4137
## CYTOSKELETON             991/9493/9133/7153/9787/22974/4751/983/332/3832/7272/9055/6790/24137/4137/7802
##                          Count
## SPINDLE                     11
## MICROTUBULE_CYTOSKELETON    16
## CYTOSKELETAL_PART           15
## SPINDLE_MICROTUBULE          5
## MICROTUBULE                  6
## CYTOSKELETON                16
egmt2 <- GSEA(geneList, TERM2GENE=c5, verbose=FALSE)
head(egmt2)
##                                                                    ID
## EXTRACELLULAR_REGION                             EXTRACELLULAR_REGION
## EXTRACELLULAR_REGION_PART                   EXTRACELLULAR_REGION_PART
## EXTRACELLULAR_MATRIX                             EXTRACELLULAR_MATRIX
## CELL_PROJECTION                                       CELL_PROJECTION
## PROTEINACEOUS_EXTRACELLULAR_MATRIX PROTEINACEOUS_EXTRACELLULAR_MATRIX
## EXTRACELLULAR_MATRIX_PART                   EXTRACELLULAR_MATRIX_PART
##                                                           Description
## EXTRACELLULAR_REGION                             EXTRACELLULAR_REGION
## EXTRACELLULAR_REGION_PART                   EXTRACELLULAR_REGION_PART
## EXTRACELLULAR_MATRIX                             EXTRACELLULAR_MATRIX
## CELL_PROJECTION                                       CELL_PROJECTION
## PROTEINACEOUS_EXTRACELLULAR_MATRIX PROTEINACEOUS_EXTRACELLULAR_MATRIX
## EXTRACELLULAR_MATRIX_PART                   EXTRACELLULAR_MATRIX_PART
##                                    setSize enrichmentScore
## EXTRACELLULAR_REGION                   401      -0.3860230
## EXTRACELLULAR_REGION_PART              310      -0.4101043
## EXTRACELLULAR_MATRIX                    95      -0.6229461
## CELL_PROJECTION                         87      -0.4729701
## PROTEINACEOUS_EXTRACELLULAR_MATRIX      93      -0.6355317
## EXTRACELLULAR_MATRIX_PART               54      -0.5908035
##                                          NES      pvalue
## EXTRACELLULAR_REGION               -1.694969 0.001240695
## EXTRACELLULAR_REGION_PART          -1.764536 0.001310616
## EXTRACELLULAR_MATRIX               -2.324445 0.001451379
## CELL_PROJECTION                    -1.741796 0.001455604
## PROTEINACEOUS_EXTRACELLULAR_MATRIX -2.360635 0.001466276
## EXTRACELLULAR_MATRIX_PART          -1.980073 0.001562500
##                                      p.adjust    qvalues
## EXTRACELLULAR_REGION               0.03159609 0.02468712
## EXTRACELLULAR_REGION_PART          0.03159609 0.02468712
## EXTRACELLULAR_MATRIX               0.03159609 0.02468712
## CELL_PROJECTION                    0.03159609 0.02468712
## PROTEINACEOUS_EXTRACELLULAR_MATRIX 0.03159609 0.02468712
## EXTRACELLULAR_MATRIX_PART          0.03159609 0.02468712
##                                    rank
## EXTRACELLULAR_REGION               1797
## EXTRACELLULAR_REGION_PART          1897
## EXTRACELLULAR_MATRIX               1473
## CELL_PROJECTION                    2280
## PROTEINACEOUS_EXTRACELLULAR_MATRIX 1473
## EXTRACELLULAR_MATRIX_PART          1794
##                                                      leading_edge
## EXTRACELLULAR_REGION               tags=29%, list=14%, signal=26%
## EXTRACELLULAR_REGION_PART          tags=32%, list=15%, signal=28%
## EXTRACELLULAR_MATRIX               tags=48%, list=12%, signal=43%
## CELL_PROJECTION                    tags=28%, list=18%, signal=23%
## PROTEINACEOUS_EXTRACELLULAR_MATRIX tags=49%, list=12%, signal=44%
## EXTRACELLULAR_MATRIX_PART          tags=59%, list=14%, signal=51%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             core_enrichment
## EXTRACELLULAR_REGION               57124/3910/51162/2878/2717/3373/4153/10406/1301/6750/7474/4925/7450/80781/1490/1306/3931/4314/6586/3964/10272/8425/8082/11005/4256/3483/7482/8910/23037/27122/7042/3912/4322/167/2817/9353/6037/1278/2934/5176/4060/283/30008/5549/5950/22795/727/10516/23452/1293/2247/1295/1012/6469/2192/1281/4023/54360/50509/11167/4319/1290/9365/3952/10879/11096/2202/4313/3625/2199/6444/6320/1294/3075/4653/5764/3991/3263/1462/1289/3908/4016/3909/4053/8817/7033/8292/5125/2162/5744/1842/5654/10631/2331/3730/2487/347/6863/5104/3913/27123/4982/1300/2200/9607/1287/7060/1489/9723/6424/1307/1311/4693/4148/1101/2922/10647
## EXTRACELLULAR_REGION_PART                                                                                                   4017/268/3567/57124/3910/2878/3373/4153/10406/1301/6750/7474/4925/80781/1490/1306/3931/4314/6586/3964/10272/8425/8082/4256/3483/7482/8910/27122/3912/4322/167/2817/1278/4060/283/30008/5549/5950/22795/727/10516/23452/1293/2247/1295/1012/6469/2192/1281/54360/50509/11167/4319/1290/9365/3952/10879/11096/2202/4313/2199/6444/1294/3075/4653/5764/3991/3263/1462/1289/3908/3909/4053/8817/8292/5125/5744/1842/5654/10631/2331/3730/347/6863/3913/27123/1300/2200/9607/1287/7060/9723/6424/1307/1311/4693/4148/1101/2922/10647
## EXTRACELLULAR_MATRIX                                                                                                                                                                                                                                                                                                                                                                                            80781/1490/1306/8425/8082/4256/8910/3912/1278/4060/283/30008/5549/22795/10516/1293/1295/2192/1281/50509/4319/1290/11096/2202/2199/6444/1294/1462/1289/3908/3909/4053/8292/1842/10631/2331/3730/3913/1300/2200/1287/7060/1307/1311/4148/1101
## CELL_PROJECTION                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              9732/4763/57147/64064/80184/322/54997/7248/5311/7042/323/4747/4744/1012/11346/2191/4741/4646/9576/114327/51466/27124/4137/7802
## PROTEINACEOUS_EXTRACELLULAR_MATRIX                                                                                                                                                                                                                                                                                                                                                                              80781/1490/1306/8425/8082/4256/8910/3912/1278/4060/283/30008/5549/22795/10516/1293/1295/2192/1281/50509/4319/1290/11096/2202/2199/6444/1294/1462/1289/3908/3909/4053/8292/1842/10631/2331/3730/3913/1300/2200/1287/7060/1307/1311/4148/1101
## EXTRACELLULAR_MATRIX_PART                                                                                                                                                                                                                                                                                                                                                                                                                                                               1298/3915/6443/55914/3910/1301/80781/1306/8082/8910/3912/1278/4060/283/30008/22795/1293/1295/1281/50509/1290/6444/1294/1289/3908/3909/8292/3913/1300/2200/1287/1307