...
 
Commits (1415)
con/
### Java template
*.class
......@@ -7,10 +9,13 @@
*.json
*.csv
pw
# Package Files #
*.jar
*.war
*.ear
*.zip
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
......@@ -19,10 +24,16 @@ hs_err_pid*
lib/*
out/*
.idea/*
.idea/workspace.xml
backend/.idea/*
frontend/.idea/*
.idea
target
*.svg
*.gif
*.pdf
log
out/
src/main/resources/static/fonts/
<component name="ArtifactManager">
<artifact type="jar" name="MultiGenBrowser:jar">
<output-path>$PROJECT_DIR$/out/artifacts/MultiGenBrowser_jar</output-path>
<root id="archive" name="MultiGenBrowser.jar">
<element id="module-output" name="MultiGenBrowser" />
</root>
</artifact>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<resourceExtensions />
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
<entry name="!?*.aj" />
</wildcardResourcePatterns>
<annotationProcessing>
<profile default="true" name="Default" enabled="false">
<processorPath useClasspath="true" />
</profile>
<profile default="false" name="Annotation profile for MultiGenBrowser" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<processorPath useClasspath="true" />
<module name="MultiGenBrowser" />
</profile>
</annotationProcessing>
<bytecodeTargetLevel>
<module name="MultiGenBrowser" target="1.8" />
</bytecodeTargetLevel>
</component>
</project>
\ No newline at end of file
<component name="CopyrightManager">
<settings default="" />
</component>
\ No newline at end of file
<html>Simple <b>Java</b> application that includes a class with <code>main()</code> method</html>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$" charset="UTF-8" />
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<component name="libraryTable">
<library name="commons-math3-3.5">
<CLASSES>
<root url="jar://$PROJECT_DIR$/lib/commons-math3-3.5/commons-math3-3.5.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="EntryPointsManager">
<entry_points version="2.0" />
</component>
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectKey">
<option name="state" value="project://e2804f05-5315-4fc6-a121-c522a6c26470" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/MultiGenBrowser.iml" filepath="$PROJECT_DIR$/MultiGenBrowser.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<template>
<input-field default="com.company">IJ_BASE_PACKAGE</input-field>
</template>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
# Enhort: Genomic Position Profiling
The rise of high-throughput methods in genomic research greatly expanded our
knowledge about the functionality of the genome. At the same time, the amount
of available genomic position data increased massively, e.g., through genome-wide
profiling of protein binding, virus integration or DNA methylation.
However, there is no specialized software to investigate integration site profiles
of virus integration or transcription factor binding sites by correlating the sites
with the diversity of available genomic annotations.
Here we present Enhort, a user-friendly software tool for relating large sets of
genomic positions to a variety of annotations. It functions as a statistics based
genome browser, not focused on a single locus but analyzing many genomic positions
simultaneously. Enhort provides comprehensive yet easy-to-use methods for statistical
analysis, visualization, and the adjustment of background models according to
experimental conditions and scientific questions.
# Availability
Enhort is publicly available online at www.enhort.mni.thm.de and published under GNU General Public License.
The application was published in 2019: https://peerj.com/articles/cs-198/
# Use pre-build
A seperate SETUPINSTR.md file is available containing instructions to run Enhort on your own infrastructure.
# Build
A JDK with Java 8 or higher is needed, as well as Maven.
Build steps:
- Download the sources from https://git.thm.de/mmnz21/Enhort
- cd into the enhortlib folder
- run 'mvn package' to build the library
- run the following to add the library to the local .m2 folder and repository:
mvn install:install-file -Dfile=Enhortlib-1.01.jar -DpomFile=pom.xml
- cd back and into the /backend
- run 'mvn package -Dmaven.test.skip=true'
- Tests are skipped because several test-files are needed to run them
- cd back and into the /frontend directory
- run 'mvn package'
- go up one directory
- rename both jars with:
mv backend/target/Enhort-1.01-jar-with-dependencies.jar enhort.jar
mv frontend/target/Enhort-1.01-jar frontend.jar
- continue with the set up as described in the SETUPINSTR.md file
# VM
There is a VirtualBox image with Enhort set up for reference and to test with your own data:
https://zenodo.org/record/2597397
https://doi.org/10.5281/zenodo.2597397
DOI: 10.5281/zenodo.2597397
# Basic
The following guide is written for a Linux based system. Enhort runs on Java 8 and needs a SQlite database to store information about available tracks.
It is recommended to download and use the pre-build database.
For full usage it is recommended to have server with about 32 GB Mem for the current database. However, Enhort is capable of beeing run on a small PC or server with less annotation tracks.
Enhort is build to run on two different servers, a computation back-end server and a visibile front-end server. However, it is possible to run both on the same server. Use 127.0.0.1 as ip then.
# Database
### Use pre-build database
Download the minimal SQlite database and the corresponding .bed-tracks from
https://homepages.thm.de/~mmnz21/minimal.db
https://homepages.thm.de/~mmnz21/enhort_bed_files_hg19.tar.gz
Save the database and unpack .bed-files in a known directory.
### Build your own database
TODO
# Server setup
Download the enhort.jar from
https://homepages.thm.de/~mmnz21/enhort.jar
### Run the server
The server is run with the following command:
java -jar -Xmx4g -XX:StringTableSize=1000003 /path/to/server/jar/enhort.jar --data-path /path/to/data/directory/ --db /path/to/database.db [-p PORTNUMBER --custom /path/to/custom/tracks/without/db]
- The -Xmx4g flag raises the available memory to 4 GB, please specify your available memory here.
- The StringTableSize improves start up speed for loading the data
- -p sets the port to listen on. Default is: 42412
- The server takes about 2 minutes to start, when the message "Still loading track files. Stopping now" the server is up and running
# Frontend setup
Download the frontend archive containing the .jar and the contig sizes for hg19 and hg38 from
https://homepages.thm.de/~mmnz21/frontend.tar.gz
Create a empty directory /logs in user home directory for logfiles
The frontend is run with the following command:
java -Xmx2g -Dmultipart.maxFileSize=20MB -Dmultipart.maxRequestSize=20MB -Dspring.profiles.active=production -jar frontend.jar --ip 127.0.0.1 --contigs-path /path/to/contig/size/files
- You should specify the maximum allowed upload file size
- You need to specify the address of the backend server. If both are on the same server use 127.0.0.1
- The path to the contig sizes file (which is included in the frontend-archive)
- A usage statistics file is written at the given location of the statistics file or at /tmp
# Bed test file
You can get a custom build test file containing some integration sites for hg19 here:
https://homepages.thm.de/~mmnz21/test.bed
# Misc
### Encryption on your own server
TODO
### Package Frontend.tar.gz
tar cvf frontend.tar frontend.jar ../../con/
#### Content:
- 5 frontend.jar
- 6 con/
- 7 con/contigs_GRCh38
- 8 con/contigs_hg19
- 9 con/contigs_hg18
### Known issues
- The sample button does not work on custom systems because the .bed-file used for the sample run is not available
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="src" level="project" />
<orderEntry type="library" name="Maven: de.thm.enhort.lib:Enhortlib:1.01" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.25" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.6" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.4" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-exec:1.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
<orderEntry type="library" name="Maven: org.json:json:20151123" level="project" />
<orderEntry type="library" name="Maven: org.xerial:sqlite-jdbc:3.20.0" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.2.3" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.2.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-all:1.10.19" level="project" />
</component>
</module>
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
Z+CcpaYJWpGpLhhtxBe6T2MbrORUb27R
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>de.thm.enhortbackend</groupId>
<artifactId>Enhort</artifactId>
<version>1.01</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<!-- get all project dependencies -->
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<!-- MainClass in mainfest make a executable jar -->
<archive>
<manifest>
<mainClass>de.thm.run.BackendServer</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12</version>
<configuration>
<runOrder>random</runOrder>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>de.thm.enhort.lib</groupId>
<artifactId>Enhortlib</artifactId>
<version>1.01</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20151123</version>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.20.0</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.sourceforge.argparse4j</groupId>
<artifactId>argparse4j</artifactId>
<version>0.8.1</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
Manifest-Version: 1.0
Main-Class: de.thm.run.Main
Main-Class: de.thm.spring.run.Webinterface
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.exception.CovariatesException;
import de.thm.exception.TrackTypeNotAllowedExcpetion;
import de.thm.genomeData.tracks.*;
import de.thm.misc.Genome;
import de.thm.positionData.Sites;
import java.util.List;
import java.util.stream.Collectors;
/**
* <p>
* Factory for Background models. Start here to generate a background model based on a count of sites or a list or a single
* covariant.
* </p>
* Created by Michael Menzel on 23/2/16.
*/
public final class BackgroundModelFactory {
private static final int maxCovariants = 4;
private static final int maxCovariantsInOutOnly = 10;
/**
* Creates a random backgroundmodel of given size.
*
* @param positionCount - count of random positions to create.
* @return background model as sites object.
*/
public static Sites createBackgroundModel(Genome.Assembly assembly, int positionCount) {
if(positionCount < 10000)
positionCount = 10000;
return RandomBackgroundModel.create(assembly, positionCount);
}
/**
* Creates a background model based on one track as covariant, the given sites and a minimum of sites to create.
*
* @param track - covariant track
* @param sites - sites to set the probabilities for the background positions
* @param minSites - minimum expected sites count
* @param influence - influence of the model on positions (between 0 and 1)
*
* @return background model as sites object.
*/
private static Sites createBackgroundModel(final Track track, final Sites sites, final int minSites, final double influence) throws TrackTypeNotAllowedExcpetion {
if (track instanceof InOutTrack)
return SingleTrackBackgroundModel.create((InOutTrack) track, sites, minSites);
else if (track instanceof ScoredTrack) // put single track in a list of size one
return ScoreBackgroundModel.scoreBackgroundModel((ScoredTrack) track, sites, minSites, influence);
else if (track instanceof NamedTrack) //convert the single track to a scored track and put in a list of size one
return NamedBackgroundModel.create(sites, minSites, (NamedTrack) track);
else if (track instanceof DistanceTrack)
return DistanceBackgroundModel.create((DistanceTrack) track, sites, 200);
else if (track instanceof StrandTrack)
return RandomBackgroundModel.create(track.getAssembly(), minSites); // TODO add missing strandTrack BG model
throw new TrackTypeNotAllowedExcpetion("Type of " + track + " unkonwn");
}
/**
* Creates a background model with a given list of covariants and sites and a minimum of sites to create.
*
* @param trackList - list of covariants. If the list is of size one createBackgroundModel(trackList.get(0), sites,minSites) is called
* @param sites - sites to set the probabilities for the background positions
* @param minSites - minimum expected sites count
* @return background model as sites object.
* @throws CovariatesException - if there are too many covariants
*/
public static Sites createBackgroundModel(List<Track> trackList, Sites sites, int minSites, double smooth) throws CovariatesException, TrackTypeNotAllowedExcpetion {
if (trackList.isEmpty())
return createBackgroundModel(sites.getAssembly(), sites.getPositionCount());
else if (trackList.size() == 1)
return createBackgroundModel(trackList.get(0), sites, minSites, smooth);
else if (trackList.stream().allMatch(i -> i instanceof InOutTrack))
if(trackList.size() < maxCovariantsInOutOnly) {
return MultiTrackBackgroundModel.create(trackList, sites, minSites);
} else
throw new CovariatesException("Too many covariants: " + trackList.size() + ". Max " + maxCovariantsInOutOnly + " are allowed");
else if (trackList.size() <= maxCovariants) {
if (trackList.stream().allMatch(i -> i instanceof ScoredTrack)) {
List<ScoredTrack> newList = trackList.stream().map(i -> (ScoredTrack) i).collect(Collectors.toList());
return ScoreBackgroundModel.create(newList, sites, minSites, smooth);
} else {
List<ScoredTrack> scoredIntervals = trackList.stream()
.filter(i -> i instanceof ScoredTrack)
.map(i -> (ScoredTrack) i)
.collect(Collectors.toList());
//convert all non score intervals to score interval
scoredIntervals.addAll(trackList.stream()
.filter(i -> i instanceof InOutTrack)
.map(i -> (InOutTrack) i)
.map(Tracks::cast)
.collect(Collectors.toList()));
scoredIntervals.addAll(trackList.stream()
.filter(i -> i instanceof NamedTrack)
.map(i -> (NamedTrack) i)
.map(Tracks::cast)
.collect(Collectors.toList()));
return ScoreBackgroundModel.create(scoredIntervals, sites, minSites, smooth);
}
} else {
throw new CovariatesException("Too many covariants. " + trackList.size() + ". Only " + maxCovariantsInOutOnly + " are allowed");
}
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.calc.Distances;
import de.thm.genomeData.tracks.DistanceTrack;
import de.thm.genomeData.tracks.InOutTrack;
import de.thm.genomeData.tracks.Track;
import de.thm.genomeData.tracks.Tracks;
import de.thm.positionData.Sites;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* Calculates a distances background model for a single distance track
*
* Created by menzel on 10/10/16.
*/
class DistanceBackgroundModel {
static BackgroundModel create(DistanceTrack track, Sites sites, int standardDeviation){
List<Long> positions;
//generate positions inside
List<Long> distHist = generateDistanceHist(track, sites);
int count = distHist.size(); //(sites.getPositionCount() > 10000) ? sites.getPositionCount() : 10000;
positions = generatePositions(distHist, track, count, standardDeviation);
//generate outside positions
InOutTrack ousideTrack = Tracks.invert(Tracks.convertByRange(track, 5000));
positions.addAll(SingleTrackBackgroundModel.randPositions(sites.getPositionCount()- distHist.size(), ousideTrack));
Collections.sort(positions);
return new BackgroundModel(positions, sites.getAssembly());
}
private static List<Long> generatePositions(List<Long> distances, Track track, int count, int standardDeviation) {
MersenneTwister rand;
rand = new MersenneTwister();
List<Long> positions = new ArrayList<>();
List<Long> upstream = distances.parallelStream().filter(i -> i < 0).sorted().collect(Collectors.toList());
List<Long> downstream = distances.parallelStream().filter(i -> i >= 0).sorted().collect(Collectors.toList());
Collections.reverse(downstream); // reverse downstream list to begin with the farest distances to the postion. upstream is already in order
NormalDistribution nd = new NormalDistribution(0,standardDeviation);
int i = 0;
while(i <= count){
//get random start:
int id = 1 + (int) Math.floor(rand.nextDouble() * (track.getStarts().length - 2));
long start = track.getStarts()[id];
if(rand.nextBoolean()){ // set the new site up oder downstream of the random position by random
long prev = track.getStarts()[id - 1];
long dist_prev = (prev - start)/2;
for(Long dist: upstream)
if(dist > dist_prev){
//set new pos:
long f = (long) nd.sample();
positions.add(start + dist + f); //dist is negative in this branch
upstream.remove(dist);
break;
}
} else {
long fol = track.getStarts()[id + 1];
long dist_fol = (fol - start)/2;
for(Long dist: downstream)
if(dist < dist_fol){
//set new pos:
long f = (long) nd.sample();
positions.add(start + dist + f); //dist is negative in this branch
downstream.remove(dist);
break;
}
}
i++;
}
return positions;
}
private static List<Long> generateDistanceHist(Track track, Sites sites) {
Distances dist = new Distances();
List<Long> distances = new ArrayList<>();
distances.addAll(dist.distancesToNext(track, sites));
return distances.parallelStream().filter(i -> i < 5000 && i > -5000).collect(Collectors.toList());
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.genomeData.tracks.Track;
import de.thm.genomeData.tracks.TrackFactory;
import de.thm.genomeData.tracks.Tracks;
import de.thm.misc.Genome;
import de.thm.positionData.Sites;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* Implements a background model which can use many covariants.
* <p>
* Created by Michael Menzel on 13/1/16.
*/
class MultiTrackBackgroundModel {
/**
* Constructor. Creates a Bg Model with covariants according the given intervals and positions.
*
* @param tracks - covariants
* @param inputPositions - positions to match against
*/
static BackgroundModel create(List<Track> tracks, Sites inputPositions, int minSites) {
List<Long> positions = new ArrayList<>();
AppearanceTable appearanceTable = new AppearanceTable(minSites);
appearanceTable.fillTable(tracks, inputPositions);
positions.addAll(randPositions(appearanceTable, tracks));
return new BackgroundModel(positions, tracks.get(0).getAssembly());
}
/**
* Generates random positions for a given appearance table and the intervals.
* The appearance table has to be made of the given intervals.
*
* @param appearanceTable - table of appearance counts
* @param tracks - intervals to match against
* @return list of positions which are spread by the same appearance values
*/
static Collection<Long> randPositions(AppearanceTable appearanceTable, List<Track> tracks) {
List<Long> sites = new ArrayList<>();
Genome.Assembly assembly = tracks.get(0).getAssembly();
Track contigs = TrackFactory.getInstance().getTrackByName("Contigs", assembly);
// set the positions for each combination of tracks
for (String app : appearanceTable.getKeySet()) {
if (app.compareTo("[]") == 0) //skip outside positions
continue;
int count = appearanceTable.getAppearance(app);
List<Track> currentTracks = appearanceTable.translate(app, tracks);
List<Track> negativeTracks = appearanceTable.translateNegative(tracks, app);
currentTracks.addAll(negativeTracks.stream().map(Tracks::invert).collect(Collectors.toList()));
Track track = Tracks.intersect(currentTracks);
//TODO check if sum of intervals is too small and add some pseudocount
sites.addAll(SingleTrackBackgroundModel.randPositions(count, Tracks.intersect(track, contigs)));
}
// set outside positions
int count = appearanceTable.getAppearance("[]");
Track outs = Tracks.intersect(tracks.stream().map(Tracks::invert).collect(Collectors.toList()));
sites.addAll(SingleTrackBackgroundModel.randPositions(count, Tracks.intersect(outs, contigs)));
Collections.sort(sites);
return sites;
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.calc.Intersect;
import de.thm.calc.TestTrackResult;
import de.thm.genomeData.tracks.*;
import de.thm.positionData.Sites;
import org.apache.commons.lang3.ArrayUtils;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static de.thm.backgroundModel.ScoreBackgroundModel.generatePositionsByProbability;
class NamedBackgroundModel {
static BackgroundModel create(Sites sites, int count, NamedTrack track) {
Intersect calc = new Intersect();
TestTrackResult result = calc.searchSingleInterval(track, sites);
// create map of names to probability for each name from names from the track
Map<String, Double> probabilities = result.getResultNames().keySet().stream()
.collect(Collectors.toMap(key -> key, key -> result.getResultNames().get(key).doubleValue() / sites.getPositionCount(), (a, b) -> b));
// create list of probabilities for the probability track from the probabilities, it is missing the probs for values outside of any named interval
// outside positions are created separately
// get overall lengths for each possible name
List<String> names = Stream.of(track.getIntervalName()).distinct().collect(Collectors.toList());
List<Long> lengths = new ArrayList<>(Collections.nCopies(names.size(), 0L));
for (int i = 0; i < track.getStarts().length; i++) {
int pos = names.indexOf(track.getIntervalName()[i]);
lengths.set(pos, lengths.get(pos) + track.getEnds()[i] - track.getStarts()[i]);
}
// generate list of probabilities for each interval
// the prob is calculated by the fraction of the length of the single interval over the length of all intervals with this name,
// times the probability for this name
List<Double> probs = new ArrayList<>();
for (int i = 0; i < track.getStarts().length; i++) {
double currLength = track.getEnds()[i] - track.getStarts()[i];
probs.add(probabilities.getOrDefault(track.getIntervalName()[i], 0.0) * (currLength / lengths.get(names.indexOf(track.getIntervalName()[i]))));
}
ScoredTrack probTrack = TrackFactory.getInstance().createScoredTrack(
track.getStarts(),
track.getEnds(),
track.getIntervalName(),
probs.stream().mapToDouble(d -> d).toArray(),
track.getName() + " probabilities for background model",
track.getDescription(),
sites.getAssembly());
count = (sites.getPositionCount() > count) ? sites.getPositionCount() : count;
count *= 1.15; // increase count to adjust for contigs filter
double percentIn = ((double) result.getIn()) / sites.getPositionCount();
double percentOut = ((double) result.getOut()) / sites.getPositionCount();
Track contigs = TrackFactory.getInstance().getTrackByName("Contigs", sites.getAssembly());
Collection<Long> pos = generatePositionsByProbability(probTrack, (int) (count * percentIn)); // creates inside positions
pos.addAll(SingleTrackBackgroundModel.randPositions((int) (count * percentOut), Tracks.invert(track))); // creates outside positions
// intersect with contigs, filter all positions outside of contigs
ArrayList<Long> positions = new ArrayList<>(pos);
Track filteredSites = Tracks.intersect(contigs, Tracks.getTrack(new BackgroundModel(positions, sites.getAssembly())));
positions.clear();
positions.addAll(Arrays.asList(ArrayUtils.toObject(filteredSites.getStarts())));
return new BackgroundModel(positions, sites.getAssembly());
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.genomeData.tracks.Track;
import de.thm.genomeData.tracks.TrackFactory;
import de.thm.misc.Genome;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.ArrayList;
import java.util.List;
/**
* Background model to create random distributed sites without covariants.
* <p>
* Created by Michael Menzel on 8/12/15.
*/
class RandomBackgroundModel {
/**
* @param assembly - assembly of the generated sites
* @param count - of sites to be generated
*/
static BackgroundModel create(Genome.Assembly assembly, int count) {
List<Character> strands = new ArrayList<>();
Track contigs;
try {
contigs = TrackFactory.getInstance().getTrackByName("Contigs", assembly);
} catch (RuntimeException e) {
// contigs track not found
contigs = TrackFactory.getInstance().createEmptyTrack(assembly);
}
List<Long> positions = new ArrayList<>(SingleTrackBackgroundModel.randPositions(count, contigs));
MersenneTwister rand = new MersenneTwister();
for (long i = 0; i < positions.size(); i++)
strands.add(rand.nextBoolean() ? '+' : '-');
return new BackgroundModel(positions,strands,assembly);
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import java.util.Arrays;
import java.util.Objects;
/**
* Holds a set of scores to reference to the intermediate interval in scored bg models
* Created by menzel on 8/23/16.
*/
class ScoreSet {
private final Double[] scoreList; // list of scores
/**
* Constructor for testing
*/
ScoreSet(Double[] values){
scoreList = values;
}
ScoreSet(int size){
scoreList = new Double[size];
}
/**
* Adds a score to the set
*
* @param score - score taken from one of the tracks
* @param i - for which position the score is added. Each track for a set of scoresSets has a number from 0 to the 'count of tracks' - 1
*/
void add(Double score, int i){
if(i < scoreList.length)
scoreList[i] = score;
else
throw new RuntimeException("Index " + i + " out of ScoreSet bounds " + scoreList.length);
}
Double[] getScores() {
return scoreList;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ScoreSet other = (ScoreSet) o;
for(int j = 0; j < scoreList.length; j++){
if(!Objects.equals(this.scoreList[j], other.getScores()[j])){
return false;
}
}
return true;
}
@Override
public int hashCode() {
return Arrays.hashCode(scoreList);
}
}
// Copyright (C) 2018 Michael Menzel
//
// This file is part of Enhort. <https://enhort.mni.thm.de>.
//
// Enhort is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Enhort is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Enhort. If not, see <https://www.gnu.org/licenses/>.
package de.thm.backgroundModel;
import de.thm.calc.Intersect;
import de.thm.calc.TestTrackResult;
import de.thm.genomeData.tracks.InOutTrack;
import de.thm.genomeData.tracks.Track;
import de.thm.genomeData.tracks.TrackFactory;
import de.thm.genomeData.tracks.Tracks;
import de.thm.positionData.Sites;
import org.apache.commons.math3.random.MersenneTwister;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* Implements the background model sites generation for a single track as covariance.
* Scored or inout intervals are possible.
* <p>
* Created by Michael Menzel on 6/1/16.
*/
class SingleTrackBackgroundModel {
/**
* Constructor for running sites against one interval
*
* @param track - interval to search against
* @param sites - sites to search
*/
static BackgroundModel create(InOutTrack track, Sites sites, int minSites) {
List<Long> positions = new ArrayList<>();
Intersect calc = new Intersect();
TestTrackResult result = calc.searchSingleInterval(track, sites);
// TODO: factor is wrong, seems to be always 1
int factor = (sites.getPositionCount() < minSites)? minSites/ sites.getPositionCount(): 1;
Track contigs = TrackFactory.getInstance().getTrackByName("Contigs", sites.getAssembly());
positions.addAll(randPositions(result.getIn() * factor, Tracks.intersect(track, contigs)));
positions.addAll(randPositions(result.getOut() * factor, Tracks.intersect(Tracks.invert(track), contigs)));
Collections.sort(positions); //sort again here after merging outside and inside positions
return new BackgroundModel(positions, sites.getAssembly());
}
/**
* Generates random positions which are either all inside or outside of the given intervals.
* The given track is always interesected with the given contigs track
*
*
* @param siteCount - count of random positions to be made up
* @param track - interval by which the in/out check is made
*
* @return Collection of random positions
*/
static Collection<Long> randPositions(int siteCount, Track track) {
MersenneTwister rand;
rand = new MersenneTwister();
long maxValue = Tracks.sumOfIntervals(track);
List<Long> randomValues = new ArrayList<>();
List<Long> sites = new ArrayList<>();
long[] intervalStart = track.getStarts();
long[] intervalEnd = track.getEnds();
//get some random numbers
for (int i = 0; i < siteCount; i++)
randomValues.add((long) Math.floor(rand.nextDouble() * maxValue));
Collections.sort(randomValues); // very important before streching to the genome!
//strech random values to whole genome:
int j = 0;
long sumOfPrevious = 0; // remember sum of previous intervals.
for (int i = 0; i < siteCount; i++) {
Long r = randomValues.get(i) - sumOfPrevious;
Long intervalSize = intervalEnd[j] - intervalStart[j];
while(r >= intervalSize){
r -= intervalSize;
sumOfPrevious += intervalSize;
j++;
intervalSize = intervalEnd[j] - intervalStart[j];
}
sites.add(r + intervalStart[j]);
}
return sites;
}
}