@article {596641, title = {An integrative ENCODE resource for cancer: interpreting non-coding mutations and gene regulation}, journal = {Nature}, year = {Submitted}, month = {In review}, author = {Jing Zhang $\#$ and Donghoon Lee $\#$ and Vineet Dhiman $\#$ and Peng Jiang $\#$ and Liu, Xiaole Shirley and Kevin White and Mark Gerstein and ENCODE consortium} } @booklet {613954, year = {Submitted} } @article {633904, title = {Landscape of B cell immunity and related immune evasion in human cancers}, journal = {Nat Genet}, volume = {51}, number = {3}, year = {2019}, month = {2019 Mar}, pages = {560-567}, abstract = {Tumor-infiltrating B cells are an important component in the microenvironment but have unclear anti-tumor effects. We enhanced our previous computational algorithm TRUST to extract the B cell immunoglobulin hypervariable regions from bulk tumor RNA-sequencing data. TRUST assembled more than 30 million complementarity-determining region 3 sequences of the B cell heavy chain (IgH) from The Cancer Genome Atlas. Widespread B cell clonal expansions and immunoglobulin subclass switch events were observed in diverse human cancers. Prevalent somatic copy number alterations in the MICA and MICB genes related to antibody-dependent cell-mediated cytotoxicity were identified in tumors with elevated B cell activity. The IgG3-1 subclass switch interacts with B cell-receptor affinity maturation and defects in the antibody-dependent cell-mediated cytotoxicity pathway. Comprehensive pancancer analyses of tumor-infiltrating B cell-receptor repertoires identified novel tumor immune evasion mechanisms through genetic alterations. The IgH sequences identified here are potentially useful resources for future development of immunotherapies.}, issn = {1546-1718}, doi = {10.1038/s41588-018-0339-x}, author = {Xihao Hu and Zhang, Jian and Wang, Jin and Jingxin Fu and Li, Taiwen and Zheng, Xiaoqi and Wang, Binbin and Shengqing Gu and Jiang, Peng and Fan, Jingyu and Ying, Xiaomin and Zhang, Jing and Carroll, Michael C and Wucherpfennig, Kai W and Hacohen, Nir and Fan Zhang and Zhang, Peng and Liu, Jun S and Li, Bo and X. Shirley Liu} } @article {620084, title = {Signatures of T cell dysfunction and exclusion predict cancer immunotherapy response}, journal = {Nature Medicine}, volume = {24}, number = {20}, year = {2018}, month = {2018 Aug 20}, pages = {1550-1558}, abstract = {Cancer treatment by immune checkpoint blockade (ICB) can bring long-lasting clinical benefits, but only a fraction of patients respond to treatment. To predict ICB response, we developed TIDE, a computational method to model two primary mechanisms of tumor immune evasion: the induction of T cell dysfunction in tumors with high infiltration of cytotoxic T lymphocytes (CTL) and the prevention of T cell infiltration in tumors with low CTL level. We identified signatures of T cell dysfunction from large tumor cohorts by testing how the expression of each gene in tumors interacts with the CTL infiltration level to influence patient survival. We also modeled factors that exclude T cell infiltration into tumors using expression signatures from immunosuppressive cells. Using this framework and pre-treatment RNA-Seq or NanoString tumor expression profiles, TIDE predicted the outcome of melanoma patients treated with first-line anti-PD1 or anti-CTLA4 more accurately than other biomarkers such as PD-L1 level and mutation load. TIDE also revealed new candidate ICB resistance regulators, such as SERPINB9, demonstrating utility for immunotherapy research.}, issn = {1546-170X}, doi = {10.1038/s41591-018-0136-1}, author = {Peng Jiang $\#$ and Shengqing Gu $\#$ and Deng Pan $\#$ and Jingxin Fu and Sahu, Avinash and Xihao Hu and Ziyi Li and Traugh, Nicole and Xia Bu and Li, Bo and Jun Liu and Freeman, Gordon J and Brown, Myles A and Wucherpfennig, Kai W and X. Shirley Liu} } @article {613952, title = {Big Data Approaches for Modeling Response and Resistance to Cancer Drugs}, journal = {Annual Review of Biomedical Data Science}, volume = {1}, number = {1}, year = {2018}, pages = {1-27}, abstract = {Despite significant progress in cancer research, current standard-of-care drugs fail to cure many types of cancers. Hence, there is an urgent need to identify better predictive biomarkers and treatment regimes. Conventionally, insights from hypothesis-driven studies are the primary force for cancer biology and therapeutic discoveries. Recently, the rapid growth of big data resources, catalyzed by breakthroughs in high-throughput technologies, has resulted in a paradigm shift in cancer therapeutic research. The combination of computational methods and genomics data has led to several successful clinical applications. In this review, we focus on recent advances in data-driven methods to model anticancer drug efficacy, and we present the challenges and opportunities for data science in cancer therapeutic research.}, keywords = {big data, combination therapy, Drug Resistance, Immunotherapy, Precision Medicine, response biomarker, toxicity}, doi = {10.1146/annurev-biodatasci-080917-013350}, url = {https://www.annualreviews.org/doi/abs/10.1146/annurev-biodatasci-080917-013350}, author = {Jiang, Peng and William R. Sellers and X. Shirley Liu} } @article {613953, title = {Estrogen-regulated feedback loop limits the efficacy of estrogen receptor{\textendash}targeted breast cancer therapy}, journal = {Proceedings of the National Academy of Sciences}, year = {2018}, abstract = {Estrogen receptor-positive (ER+) breast cancer is treated with endocrine therapies, although therapeutic resistance almost invariably develops in advanced disease. Using genome-wide CRISPR screens, we identified genes whose loss confers endocrine resistance, as well as synthetic lethal vulnerabilities to overcome such resistance. These findings reveal an estrogen-induced negative feedback loop that constrains the growth of ER+ tumors, thereby limiting the efficacy of therapies that inhibit ER, and suggest a previously unappreciated therapeutic route to overcoming endocrine resistance.Endocrine therapy resistance invariably develops in advanced estrogen receptor-positive (ER+) breast cancer, but the underlying mechanisms are largely unknown. We have identified C-terminal SRC kinase (CSK) as a critical node in a previously unappreciated negative feedback loop that limits the efficacy of current ER-targeted therapies. Estrogen directly drives CSK expression in ER+ breast cancer. At low CSK levels, as is the case in patients with ER+ breast cancer resistant to endocrine therapy and with the poorest outcomes, the p21 protein-activated kinase 2 (PAK2) becomes activated and drives estrogen-independent growth. PAK2 overexpression is also associated with endocrine therapy resistance and worse clinical outcome, and the combination of a PAK2 inhibitor with an ER antagonist synergistically suppressed breast tumor growth. Clinical approaches to endocrine therapy-resistant breast cancer must overcome the loss of this estrogen-induced negative feedback loop that normally constrains the growth of ER+ tumors.}, doi = {10.1073/pnas.1722617115}, url = {http://www.pnas.org/content/pnas/early/2018/07/06/1722617115.full.pdf}, author = {Xiao, Tengfei and Li, Wei and Xiaoqing Wang and Han Xu and Jixin Yang and Wu, Qiu and Huang, Ying and Geradts, Joseph and Jiang, Peng and Teng Fei and Chi, David and Zang, Chongzhi and Qi Liao and Rennhack, Jonathan and Eran Andrechek and Li, Nanlin and Detre, Simone and Dowsett, Mitchell and Rinath M. Jeselsohn and X. Shirley Liu and Brown, Myles} } @article {doi:10.1093/bioinformatics/bty450, title = {Improved design and analysis of CRISPR knockout screens}, journal = {Bioinformatics}, year = {2018}, pages = {bty450}, doi = {10.1093/bioinformatics/bty450}, url = {http://dx.doi.org/10.1093/bioinformatics/bty450}, author = {Chen, Chen-Hao and Xiao, Tengfei and Han Xu and Jiang, Peng and Meyer, Clifford A and Li, Wei and Brown, Myles and X. Shirley Liu} } @article {607060, title = {Genome-Scale Signatures of Gene Interaction from Compound Screens Predict Clinical Efficacy of Targeted Cancer Therapies}, journal = {Cell Systems}, volume = {6}, number = {3}, year = {2018}, month = {2018 Feb 06}, pages = {343-354}, abstract = {Identifying reliable drug response biomarkers is a\ significant challenge in cancer research. We present computational analysis of resistance (CARE), a computational method focused on targeted therapies, to infer genome-wide transcriptomic signatures of drug efficacy from cell line compound screens. CARE outputs genome-scale scores to measure how the drug target gene interacts with other genes to affect the inhibitor efficacy in the compound screens. Such statistical interactions between drug targets and other genes were not considered in previous studies but are critical in identifying predictive biomarkers. When evaluated using transcriptome data from clinical studies, CARE can predict the therapy outcome better than signatures from other computational methods and genomics experiments. Moreover, the CARE signatures for the PLX4720 BRAF inhibitor are associated with an anti-programmed death 1 clinical response, suggesting a common efficacy signature between a targeted therapy and immunotherapy. When searching for genes related to lapatinib resistance, CARE identified PRKD3 as the top candidate. PRKD3 inhibition, by both small interfering RNA and compounds, significantly sensitized breast cancer cells to lapatinib. Thus, CARE should enable large-scale inference of response biomarkers and drug combinations for targeted therapies using compound screen data.}, issn = {2405-4712}, doi = {10.1016/j.cels.2018.01.009}, url = {http://www.cell.com/cell-systems/fulltext/S2405-4712(18)30009-7}, author = {Jiang, Peng and Winston Lee and Xujuan Li and Carl Johnson and Liu, Jun S and Brown, Myles and Jon Christopher Aster and X. Shirley Liu} } @article {606647, title = {A major chromatin regulator determines resistance of tumor cells to T cell-mediated killing}, journal = {Science}, volume = {359}, number = {6377}, year = {2018}, month = {2018 02 16}, pages = {770-775}, abstract = {Many human cancers are resistant to immunotherapy, for reasons that are poorly understood. We used a genome-scale CRISPR-Cas9 screen to identify mechanisms of tumor cell resistance to killing by cytotoxic T cells, the central effectors of antitumor immunity. Inactivation of \>100 genes-including,, and, which encode components of the PBAF form of the SWI/SNF chromatin remodeling complex-sensitized mouse B16F10 melanoma cells to killing by T cells. Loss of PBAF function increased tumor cell sensitivity to interferon-γ, resulting in enhanced secretion of chemokines that recruit effector T cells. Treatment-resistant tumors became responsive to immunotherapy whenwas inactivated. In many human cancers, expression ofandinversely correlated with expression of T cell cytotoxicity genes, and-deficient murine melanomas were more strongly infiltrated by cytotoxic T cells.}, issn = {1095-9203}, doi = {10.1126/science.aao1710}, author = {Deng Pan $\#$ and Aya Kobayashi $\#$ and Peng Jiang $\#$ and Lucas Ferrari de Andrade and Rong En Tay and Luoma, Adrienne M and Tsoucas, Daphne and Xintao Qiu and Klothilda Lim and Prakash Rao and Long, Henry W and Yuan, Guo-Cheng and John Doench and Brown, Myles and X. Shirley Liu and Wucherpfennig, Kai W} } @article {596476, title = {Cistrome Cancer: A Web Resource for Integrative Gene Regulation Modeling in Cancer}, journal = {Cancer Res}, volume = {77}, number = {21}, year = {2017}, month = {2017 Nov 01}, pages = {e19-e22}, abstract = {Cancer results from a breakdown of normal gene expression control, so the study of gene regulation is critical to cancer research. To gain insight into the transcriptional and epigenetic factors regulating abnormal gene expression patterns in cancers, we developed the Cistrome Cancer web resource (http://cistrome.org/CistromeCancer/). We conducted the systematic integration and modeling of over 10,000 tumor molecular profiles from The Cancer Genome Atlas (TCGA) with over 23,000 ChIP-seq and chromatin accessibility profiles from our Cistrome collection. The results include reconstruction of functional enhancer profiles, "super-enhancer" target genes, as well as predictions of active transcription factors and their target genes for each TCGA cancer type. Cistrome Cancer reveals novel insights from integrative analyses combining chromatin profiles with tumor molecular profiles and will be a useful resource to the cancer gene regulation community. Cancer Res; 77(21); e19-22. {\textcopyright}2017 AACR.}, keywords = {Chromatin, Computational Biology, Epigenomics, Gene Expression Regulation, Neoplastic, Humans, Internet, Neoplasms, Sequence Analysis, DNA, Transcription Factors}, issn = {1538-7445}, doi = {10.1158/0008-5472.CAN-17-0327}, author = {Mei, Shenglin and Meyer, Clifford A and Zheng, Rongbin and Qin, Qian and Wu, Qiu and Jiang, Peng and Li, Bo and Shi, Xiaohui and Wang, Binbin and Fan, Jingyu and Shih, Celina and Brown, Myles and Zang, Chongzhi and X. Shirley Liu} } @article {596471, title = {Exploring genetic associations with ceRNA regulation in the human genome}, journal = {Nucleic Acids Res}, volume = {45}, number = {10}, year = {2017}, month = {2017 Jun 02}, pages = {5653-5665}, abstract = {Competing endogenous RNAs (ceRNAs) are RNA molecules that sequester shared microRNAs (miRNAs) thereby affecting the expression of other targets of the miRNAs. Whether genetic variants in ceRNA can affect its biological function and disease development is still an open question. Here we identified a large number of genetic variants that are associated with ceRNA{\textquoteright}s function using Geuvaids RNA-seq data for 462 individuals from the 1000 Genomes Project. We call these loci competing endogenous RNA expression quantitative trait loci or {\textquoteright}cerQTL{\textquoteright}, and found that a large number of them were unexplored in conventional eQTL mapping. We identified many cerQTLs that have undergone recent positive selection in different human populations, and showed that single nucleotide polymorphisms in gene 3΄UTRs at the miRNA seed binding regions can simultaneously regulate gene expression changes in both cis and trans by the ceRNA mechanism. We also discovered that cerQTLs are significantly enriched in traits/diseases associated variants reported from genome-wide association studies in the miRNA binding sites, suggesting that disease susceptibilities could be attributed to ceRNA regulation. Further in vitro functional experiments demonstrated that a cerQTL rs11540855 can regulate ceRNA function. These results provide a comprehensive catalog of functional non-coding regulatory variants that may be responsible for ceRNA crosstalk at the post-transcriptional level.}, keywords = {3{\textquoteright} Untranslated Regions, Base Pairing, Binding Sites, Chromosome Mapping, Gene Expression Regulation, Gene Regulatory Networks, Genome, Human, Genome-Wide Association Study, Humans, MicroRNAs, Polymorphism, Single Nucleotide, Quantitative Trait Loci, RNA, Untranslated}, issn = {1362-4962}, doi = {10.1093/nar/gkx331}, author = {Li, Mulin Jun and Zhang, Jian and Liang, Qian and Xuan, Chenghao and Wu, Jiexing and Jiang, Peng and Li, Wei and Zhu, Yun and Wang, Panwen and Fernandez, Daniel and Shen, Yujun and Chen, Yiwen and Kocher, Jean-Pierre A and Yu, Ying and Sham, Pak Chung and Wang, Junwen and Liu, Jun S and X. Shirley Liu} } @article {596401, title = {Comprehensive analyses of tumor immunity: implications for cancer immunotherapy}, journal = {Genome Biol}, volume = {17}, number = {1}, year = {2016}, month = {2016 Aug 22}, pages = {174}, abstract = {BACKGROUND: Understanding the interactions between tumor and the host immune system is critical to finding prognostic biomarkers, reducing drug resistance, and developing new therapies. Novel computational methods are needed to estimate tumor-infiltrating immune cells and understand tumor-immune interactions in cancers. RESULTS: We analyze tumor-infiltrating immune cells in over 10,000 RNA-seq samples across 23 cancer types from The Cancer Genome Atlas (TCGA). Our computationally inferred immune infiltrates associate much more strongly with patient clinical features, viral infection status, and cancer genetic alterations than other computational approaches. Analysis of cancer/testis antigen expression and CD8 T-cell abundance suggests that MAGEA3 is a potential immune target in melanoma, but not in non-small cell lung cancer, and implicates SPAG5 as an alternative cancer vaccine target in multiple cancers. We find that melanomas expressing high levels of CTLA4 separate into two distinct groups with respect to CD8 T-cell infiltration, which might influence clinical responses to anti-CTLA4 agents. We observe similar dichotomy of TIM3 expression with respect to CD8 T cells in kidney cancer and validate it experimentally. The abundance of immune infiltration, together with our downstream analyses and findings, are accessible through TIMER, a public resource at http://cistrome.org/TIMER . CONCLUSIONS: We develop a computational approach to study tumor-infiltrating immune cells and their interactions with cancer cells. Our resource of immune-infiltrate levels, clinical associations, as well as predicted therapeutic markers may inform effective cancer vaccine and checkpoint blockade therapies.}, keywords = {Biomarkers, Tumor, Databases, Genetic, Disease Susceptibility, Gene Expression Regulation, Neoplastic, Genetic Variation, Humans, Immune System, Immunity, Immunotherapy, Lymphocytes, Tumor-Infiltrating, Models, Biological, Molecular Targeted Therapy, Neoplasms, Signal Transduction}, issn = {1474-760X}, doi = {10.1186/s13059-016-1028-7}, author = {Li, Bo and Severson, Eric and Pignon, Jean-Christophe and Zhao, Haoquan and Li, Taiwen and Novak, Jesse and Jiang, Peng and Shen, Hui and Aster, Jon C and Rodig, Scott and Signoretti, Sabina and Liu, Jun S and X. Shirley Liu} } @article {596436, title = {Big data mining yields novel insights on cancer}, journal = {Nat Genet}, volume = {47}, number = {2}, year = {2015}, month = {2015 Feb}, pages = {103-4}, abstract = {Recent years have seen the rapid growth of large-scale biological data, but the effective mining and modeling of {\textquoteright}big data{\textquoteright} for new biological discoveries remains a significant challenge. A new study reanalyzes expression profiles from the Gene Expression Omnibus to make novel discoveries about genes involved in DNA damage repair and genome instability in cancer.}, keywords = {DNA Copy Number Variations, Gene Dosage, Gene Expression Regulation, Neoplastic, Genomics, Humans, Neoplasms, Transcriptome}, issn = {1546-1718}, doi = {10.1038/ng.3205}, author = {Jiang, Peng and X. Shirley Liu} } @article {596426, title = {Inference of transcriptional regulation in cancers}, journal = {Proc Natl Acad Sci U S A}, volume = {112}, number = {25}, year = {2015}, month = {2015 Jun 23}, pages = {7731-6}, abstract = {Despite the rapid accumulation of tumor-profiling data and transcription factor (TF) ChIP-seq profiles, efforts integrating TF binding with the tumor-profiling data to understand how TFs regulate tumor gene expression are still limited. To systematically search for cancer-associated TFs, we comprehensively integrated 686 ENCODE ChIP-seq profiles representing 150 TFs with 7484 TCGA tumor data in 18 cancer types. For efficient and accurate inference on gene regulatory rules across a large number and variety of datasets, we developed an algorithm, RABIT (regression analysis with background integration). In each tumor sample, RABIT tests whether the TF target genes from ChIP-seq show strong differential regulation after controlling for background effect from copy number alteration and DNA methylation. When multiple ChIP-seq profiles are available for a TF, RABIT prioritizes the most relevant ChIP-seq profile in each tumor. In each cancer type, RABIT further tests whether the TF expression and somatic mutation variations are correlated with differential expression patterns of its target genes across tumors. Our predicted TF impact on tumor gene expression is highly consistent with the knowledge from cancer-related gene databases and reveals many previously unidentified aspects of transcriptional regulation in tumor progression. We also applied RABIT on RNA-binding protein motifs and found that some alternative splicing factors could affect tumor-specific gene expression by binding to target gene 3{\textquoteright}UTR regions. Thus, RABIT (rabit.dfci.harvard.edu) is a general platform for predicting the oncogenic role of gene expression regulators.}, keywords = {Gene Expression Regulation, Neoplastic, Humans, Neoplasms, Transcription, Genetic}, issn = {1091-6490}, doi = {10.1073/pnas.1424272112}, author = {Jiang, Peng and Freedman, Matthew L and Liu, Jun S and Liu, Xiaole Shirley} } @article {596461, title = {Network analysis of gene essentiality in functional genomics experiments}, journal = {Genome Biol}, volume = {16}, year = {2015}, month = {2015 Oct 30}, pages = {239}, abstract = {Many genomic techniques have been developed to study gene essentiality genome-wide, such as CRISPR and shRNA screens. Our analyses of public CRISPR screens suggest protein interaction networks, when integrated with gene expression or histone marks, are highly predictive of gene essentiality. Meanwhile, the quality of CRISPR and shRNA screen results can be significantly enhanced through network neighbor information. We also found network neighbor information to be very informative on prioritizing ChIP-seq target genes and survival indicator genes from tumor profiling. Thus, our study provides a general method for gene essentiality analysis in functional genomic experiments ( http://nest.dfci.harvard.edu ).}, keywords = {Cell Line, CRISPR-Cas Systems, Gene Expression Profiling, Gene Regulatory Networks, Genes, Essential, Genomics, Humans, Neoplasms, Software, Survival Analysis}, issn = {1474-760X}, doi = {10.1186/s13059-015-0808-9}, author = {Jiang, Peng and Wang, Hongfang and Li, Wei and Zang, Chongzhi and Li, Bo and Wong, Yinling J and Meyer, Cliff and Liu, Jun S and Aster, Jon C and X. Shirley Liu} } @article {596466, title = {CCAT: Combinatorial Code Analysis Tool for transcriptional regulation}, journal = {Nucleic Acids Res}, volume = {42}, number = {5}, year = {2014}, month = {2014 Mar}, pages = {2833-47}, abstract = {Combinatorial interplay among transcription factors (TFs) is an important mechanism by which transcriptional regulatory specificity is achieved. However, despite the increasing number of TFs for which either binding specificities or genome-wide occupancy data are known, knowledge about cooperativity between TFs remains limited. To address this, we developed a computational framework for predicting genome-wide co-binding between TFs (CCAT, Combinatorial Code Analysis Tool), and applied it to Drosophila melanogaster to uncover cooperativity among TFs during embryo development. Using publicly available TF binding specificity data and DNaseI chromatin accessibility data, we first predicted genome-wide binding sites for 324 TFs across five stages of D. melanogaster embryo development. We then applied CCAT in each of these developmental stages, and identified from 19 to 58 pairs of TFs in each stage whose predicted binding sites are significantly co-localized. We found that nearby binding sites for pairs of TFs predicted to cooperate were enriched in regions bound in relevant ChIP experiments, and were more evolutionarily conserved than other pairs. Further, we found that TFs tend to be co-localized with other TFs in a dynamic manner across developmental stages. All generated data as well as source code for our front-to-end pipeline are available at http://cat.princeton.edu.}, keywords = {Animals, Binding Sites, Drosophila melanogaster, Embryo, Nonmammalian, Embryonic Development, Gene Expression Regulation, Gene Regulatory Networks, Genomics, Regulatory Elements, Transcriptional, Software, Transcription Factors, Transcription, Genetic}, issn = {1362-4962}, doi = {10.1093/nar/gkt1302}, author = {Jiang, Peng and Singh, Mona} } @article {596411, title = {MethylPurify: tumor purity deconvolution and differential methylation detection from single tumor DNA methylomes}, journal = {Genome Biol}, volume = {15}, number = {8}, year = {2014}, month = {2014 Aug 07}, pages = {419}, abstract = {We propose a statistical algorithm MethylPurify that uses regions with bisulfite reads showing discordant methylation levels to infer tumor purity from tumor samples alone. MethylPurify can identify differentially methylated regions (DMRs) from individual tumor methylome samples, without genomic variation information or prior knowledge from other datasets. In simulations with mixed bisulfite reads from cancer and normal cell lines, MethylPurify correctly inferred tumor purity and identified over 96\% of the DMRs. From patient data, MethylPurify gave satisfactory DMR calls from tumor methylome samples alone, and revealed potential missed DMRs by tumor to normal comparison due to tumor heterogeneity.}, keywords = {Adenocarcinoma, Algorithms, Breast, Breast Neoplasms, Cell Line, Computational Biology, DNA Methylation, Female, Genetic Heterogeneity, Humans, Lung Neoplasms, Models, Statistical, Sequence Analysis, DNA}, issn = {1474-760X}, doi = {10.1186/s13059-014-0419-x}, author = {Zheng, Xiaoqi and Zhao, Qian and Wu, Hua-Jun and Li, Wei and Wang, Haiyun and Meyer, Clifford A and Qin, Qian Alvin and Han Xu and Zang, Chongzhi and Jiang, Peng and Li, Fuqiang and Hou, Yong and He, Jianxing and Wang, Jun and Wang, Jun and Zhang, Peng and Zhang, Yong and Liu, Xiaole Shirley} } @article {596511, title = {Combinatorial code analysis for understanding biological regulation}, journal = {Ph.D. Dissertation}, year = {2013}, abstract = { An important mechanism to achieve regulatory specificity in diverse biological processes is through the combinatorial interplay between different regulators, such as amongst transcription factors (TFs) during transcriptional regulation or between RNA binding proteins (RBPs) and microRNAs (miRNAs) during transcript degradation control. To advance our understanding of combinatorial regulation, we developed a computational pipeline called CCAT (Combinatorial Code Analysis Tool) for predicting genome-wide co-binding between biological regulators. In the first part of this thesis, we applied CCAT to the D. melanogaster genome to uncover cooperativity amongst TFs during embryo development. Using publicly available TF binding specificity data and DNaseI chromatin accessibility data, we first predicted genome-wide binding sites for 324 TFs across five stages of D. melanogaster embryo development. We then applied CCAT in each of these developmental stages, and identified from 20 to 60 pairs of TFs in each stage whose predicted binding sites are significantly co-localized. Several of the co-binding pairs we found correspond to TFs that are known to work together. Further, pairs of binding sites predicted to cooperate were found to be consistently enriched in their evolutionarily conservation and their tendency to be found in regions bound in relevant ChIP experiments. Finally, we found that TFs tend to be co-localized with other TFs in a dynamic manner across developmental stages. In the second part of this thesis, we applied CCAT to explore whether RBPs\ and miRNAs cooperate to promote transcript decay. We concentrated on five highly conserved RBP motifs in human 3{\textquoteright}UTRs. A specific group of miRNA recognition sites were enriched within 50 nts from the RBP recognition sites for PUM and UAUUUAU. The presence of both a PUM recognition site and a recognition site for preferentially co-occurring miRNAs was associated with faster decay of the associated transcripts. For PUM and its co-occurring miRNAs, binding of the RBP to its recognition sites was predicted to release nearby miRNA recognition sites from RNA secondary structures. Overall, our CCAT analyses suggest that a specific set of RBPs and miRNAs work together to affect transcript decay, with the release of miRNA recognition sites via RBP binding as one possible model of cooperativity. Our pipeline provides a general tool for identifying combinatorial cooperativity in biological regulation. All generated data as well as source code are available at:\ http://cat.princeton.edu. }, author = {Jiang, Peng} } @article {596446, title = {Computational assessment of the cooperativity between RNA binding proteins and MicroRNAs in Transcript Decay}, journal = {PLoS Comput Biol}, volume = {9}, number = {5}, year = {2013}, month = {2013}, pages = {e1003075}, abstract = {Transcript degradation is a widespread and important mechanism for regulating protein abundance. Two major regulators of transcript degradation are RNA Binding Proteins (RBPs) and microRNAs (miRNAs). We computationally explored whether RBPs and miRNAs cooperate to promote transcript decay. We defined five RBP motifs based on the evolutionary conservation of their recognition sites in 3{\textquoteright}UTRs as the binding motifs for Pumilio (PUM), U1A, Fox-1, Nova, and UAUUUAU. Recognition sites for some of these RBPs tended to localize at the end of long 3{\textquoteright}UTRs. A specific group of miRNA recognition sites were enriched within 50 nts from the RBP recognition sites for PUM and UAUUUAU. The presence of both a PUM recognition site and a recognition site for preferentially co-occurring miRNAs was associated with faster decay of the associated transcripts. For PUM and its co-occurring miRNAs, binding of the RBP to its recognition sites was predicted to release nearby miRNA recognition sites from RNA secondary structures. The mammalian miRNAs that preferentially co-occur with PUM binding sites have recognition seeds that are reverse complements to the PUM recognition motif. Their binding sites have the potential to form hairpin secondary structures with proximal PUM binding sites that would normally limit RISC accessibility, but would be more accessible to miRNAs in response to the binding of PUM. In sum, our computational analyses suggest that a specific set of RBPs and miRNAs work together to affect transcript decay, with the rescue of miRNA recognition sites via RBP binding as one possible mechanism of cooperativity.}, keywords = {Amino Acid Motifs, Animals, Computational Biology, Humans, Mice, MicroRNAs, Models, Genetic, RNA Stability, RNA-Binding Proteins}, issn = {1553-7358}, doi = {10.1371/journal.pcbi.1003075}, author = {Jiang, Peng and Singh, Mona and Coller, Hilary A} } @article {596451, title = {Functional interactions between microRNAs and RNA binding proteins}, journal = {Microrna}, volume = {1}, number = {1}, year = {2012}, month = {2012}, pages = {70-9}, abstract = {Ensuring the appropriate spatial-temporal control of protein abundance requires careful control of transcript levels. This process is regulated at many steps, including the rate at which transcripts decay. microRNAs (miRNAs) and RNA Binding Proteins (RBPs) represent two important regulators of transcript degradation. We review here recent literature that suggests these two regulators of transcript decay may functionally interact. Some studies have reported an excess of miRNA binding sites surrounding the positions at which RBPs bind. Experimental reports focusing on a particular transcript have identified instances in which RBPs and miRNAs compete for the same target sites, and instances in which the binding of a RBP makes a miRNA recognition site more accessible to the RISC complex. Further, miRNAs and RBPs use similar enzymes for degradation of target transcripts and the degradation of the target transcripts occurs in similar subcellular compartments. In addition to miRNA-RBP interactions involving transcript decay, RBPs have also been reported to facilitate the processing of pri-miRNAs to their final form. We summarize here several possible mechanisms through which miRNA-RBP interactions may occur.}, keywords = {Animals, Binding Sites, Gene Expression Regulation, Genome, Humans, MicroRNAs, Models, Molecular, RNA Stability, RNA-Binding Proteins}, issn = {2211-5374}, author = {Jiang, Peng and Coller, Hilary} } @article {596391, title = {The Cutoff protein regulates piRNA cluster expression and piRNA production in the Drosophila germline}, journal = {EMBO J}, volume = {30}, number = {22}, year = {2011}, month = {2011 Nov 16}, pages = {4601-15}, abstract = {In a broad range of organisms, Piwi-interacting RNAs (piRNAs) have emerged as core components of a surveillance system that protects the genome by silencing transposable and repetitive elements. A vast proportion of piRNAs is produced from discrete genomic loci, termed piRNA clusters, which are generally embedded in heterochromatic regions. The molecular mechanisms and the factors that govern their expression are largely unknown. Here, we show that Cutoff (Cuff), a Drosophila protein related to the yeast transcription termination factor Rai1, is essential for piRNA production in germline tissues. Cuff accumulates at centromeric/pericentromeric positions in germ-cell nuclei and strongly colocalizes with the major heterochromatic domains. Remarkably, we show that Cuff is enriched at the dual-strand piRNA cluster 1/42AB and is likely to be involved in regulation of transcript levels of similar loci dispersed in the genome. Consistent with this observation, Cuff physically interacts with the Heterochromatin Protein 1 (HP1) variant Rhino (Rhi). Our results unveil a link between Cuff activity, heterochromatin assembly and piRNA cluster expression, which is critical for stem-cell and germ-cell development in Drosophila.}, keywords = {Animals, Base Sequence, Chromosomal Proteins, Non-Histone, DNA Transposable Elements, Drosophila melanogaster, Drosophila Proteins, Gene Expression Regulation, Developmental, Gene Silencing, Germ Cells, High-Throughput Nucleotide Sequencing, Mutation, Nuclear Proteins, RNA, Small Interfering, Saccharomyces cerevisiae Proteins, Sequence Analysis, DNA, Stem Cells, Transcription, Genetic}, issn = {1460-2075}, doi = {10.1038/emboj.2011.334}, author = {Pane, Attilio and Jiang, Peng and Zhao, Dorothy Yanling and Singh, Mona and Sch{\"u}pbach, Trudi} } @article {596386, title = {SPICi: a fast clustering algorithm for large biological networks}, journal = {Bioinformatics}, volume = {26}, number = {8}, year = {2010}, month = {2010 Apr 15}, pages = {1105-11}, abstract = {MOTIVATION: Clustering algorithms play an important role in the analysis of biological networks, and can be used to uncover functional modules and obtain hints about cellular organization. While most available clustering algorithms work well on biological networks of moderate size, such as the yeast protein physical interaction network, they either fail or are too slow in practice for larger networks, such as functional networks for higher eukaryotes. Since an increasing number of larger biological networks are being determined, the limitations of current clustering approaches curtail the types of biological network analyses that can be performed. RESULTS: We present a fast local network clustering algorithm SPICi. SPICi runs in time O(V log V+E) and space O(E), where V and E are the number of vertices and edges in the network, respectively. We evaluate SPICi{\textquoteright}s performance on several existing protein interaction networks of varying size, and compare SPICi to nine previous approaches for clustering biological networks. We show that SPICi is typically several orders of magnitude faster than previous approaches and is the only one that can successfully cluster all test networks within very short time. We demonstrate that SPICi has state-of-the-art performance with respect to the quality of the clusters it uncovers, as judged by its ability to recapitulate protein complexes and functional modules. Finally, we demonstrate the power of our fast network clustering algorithm by applying SPICi across hundreds of large context-specific human networks, and identifying modules specific for single conditions. AVAILABILITY: Source code is available under the GNU Public License at http://compbio.cs.princeton.edu/spici.}, keywords = {Algorithms, Cluster Analysis, Databases, Protein, Metabolic Networks and Pathways, Proteins, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btq078}, author = {Jiang, Peng and Singh, Mona} }