From 23f8513422f2ce38d66a299abe94243513857ec5 Mon Sep 17 00:00:00 2001 From: Clay Birkett Date: Thu, 30 Mar 2017 18:49:35 -0700 Subject: [PATCH] add class for command line call --- gensel.php | 2477 +--------------------------------------------- gensel_class.php | 2458 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2469 insertions(+), 2466 deletions(-) create mode 100644 gensel_class.php diff --git a/gensel.php b/gensel.php index 3a6b1358..dad58c63 100644 --- a/gensel.php +++ b/gensel.php @@ -1,6 +1,6 @@ - * @license http://triticeaetoolbox.org/wheat/docs/LICENSE Berkeley-based - * @link http://triticeaetoolbox.org/wheat/downloads/downloads.php - **/ - -class Downloads -{ - /** - * Delimiter used for output files - */ - public $delimiter = "\t"; - - /** - * Using the class's constructor to decide which action to perform - * - * @param string $function action to perform - */ - public function __construct($function = null) - { - switch ($function) { - case 'genomic_prediction': - $this->genomic_prediction(); - break; - case 'run_histo': - $this->run_histo(); - break; - case 'run_gwa': - $this->run_gwa(); - break; - case 'run_gwa2': - $this->run_gwa2(); - break; - case 'run_rscript': - $this->run_rscript(); - break; - case 'run_rscript2': - $this->run_rscript2(); - break; - case 'download_session_v2': - $this->type1_session(V2); - break; - case 'download_session_v3': - $this->type1_session(V3); - break; - case 'download_session_v4': - $this->type1_session(V4); - break; - case 'refreshtitle': - $this->refreshTitle(); - break; - case 'gwas_status': - $this->status_gwas(); - break; - case 'pred_status': - $this->status_pred(); - break; - case 'filter_lines': - $this->filterLines(); - break; - default: - $this->type1Select(); - break; - } - } - - /** - * Load header and footer then check session to use existing data selection - */ - private function type1Select() - { - global $config; - global $mysqli; - require_once $config['root_dir'].'theme/normal_header.php'; - $phenotype = ""; - $lines = ""; - $markers = ""; - $saved_session = ""; - $this->type1Checksession(); - require_once 'downloads/select-map.php'; - require_once $config['root_dir'].'theme/footer.php'; - } - - /** - * Checks the session variable, if there is lines data saved then go directly to the lines menu - */ - private function type1Checksession() - { - global $mysqli; - ?> - - - - -
- refreshTitle(); - if (empty($_SESSION['phenotype'])) { - echo "Select a set of traits and phenotype trials

"; - } elseif (empty($_SESSION['selected_lines'])) { - echo "
Select validation set containing trait measurements to plot prediction vs observed. "; - echo "Wizard
"; - echo "Select prediction set without trait measurements to predict the traits. "; - echo "Lines by Properties, "; - echo "Lines by Genotype Experiment
"; - } elseif (empty($_SESSION['phenotype']) && empty($_SESSION['training_traits'])) { - echo "Please select traits before using this feature.

"; - echo "Select Traits

"; - echo "Wizard (Lines, Traits, Trials)"; - } elseif (empty($_SESSION['selected_map'])) { - if (isset($_SESSION['geno_exps'])) { - $geno_exp = $_SESSION['geno_exps']; - $geno_str = $geno_exp[0]; - $sql = "select marker_uid from allele_bymarker_exp_101 where experiment_uid = $geno_str and pos is not null limit 10"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli) . $sql); - if ($row = mysqli_fetch_array($res)) { - } else { - echo "Select a genetic map."; - echo "
"; - } - } else { - echo "Select a genetic map."; - echo "
"; - } - } - if (!empty($_SESSION['training_lines']) && !empty($_SESSION['selected_lines'])) { - if (empty($_SESSION['selected_trials'])) { - echo "Prediction"; - } else { - echo "Validation"; - $tmp = $_SESSION['selected_trials']; - $e_uid = implode(",", $tmp); - $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli) . $sql); - while ($row = mysqli_fetch_array($res)) { - echo "$row[0]
"; - } - } - - $count = count($_SESSION['selected_lines']); - $markers = $_SESSION['filtered_markers']; - $estimate = count($markers) + count($lines); - echo "$count"; - ?> - -
- - - -
- (Error - $count unique lines in prediction set)"; - } - } - echo ""; - if ($count_dup > 0) { - if (empty($_SESSION['selected_trials'])) { - echo " Warning - $count_dup lines removed from prediction set because they are in training set"; - } else { - echo " Warning - $count_dup lines removed from validation set because they are in training set"; - } - } - $min_maf = 5; - $max_missing = 10; - $max_miss_line = 10; - $unique_str = chr(rand(65, 80)).chr(rand(65, 80)).chr(rand(65, 80)).chr(rand(65, 80)); - ?> -
- -

Minimum MAF ≥ % -      - Remove markers missing > % of data -      - Remove lines missing > % of data -

-
-
-
- - -
- -

-
-
-
-
- - "; - } - echo "
"; - } - - /** - * filters markers and lines based on settings - */ - private function filterLines() - { - if (isset($_GET['maf'])) { - $min_maf = $_GET['maf']; - } else { - $min_maf = 5; - } - if (isset($_GET['mmm'])) { - $max_missing = $_GET['mmm']; - } else { - $max_missing = 10; - } - if (isset($_GET['mml'])) { - $max_miss_line = $_GET['mml']; - } else { - $max_miss_line = 10; - } - $lines = $_SESSION['selected_lines']; - if (isset($_SESSION['training_lines'])) { - $training_lines = $_SESSION['training_lines']; - } else { - $training_lines = ""; - } - if (isset($_SESSION['geno_exps'])) { - $experiment_uid = $_SESSION['geno_exps'][0]; - calculate_afe($experiment_uid, $min_maf, $max_missing, $max_miss_line); - findCommonLines($lines); - } elseif ($training_lines == "") { - calculate_af($lines, $min_maf, $max_missing, $max_miss_line); - } else { - calculate_af($training_lines, $min_maf, $max_missing, $max_miss_line); - } - ?> - - Genomic Association and Prediction"; - if (!empty($_SESSION['training_traits'])) { - $tmp = $_SESSION['training_traits']; - $tmp = $tmp[0]; - $sql = "select phenotypes_name from phenotypes where phenotype_uid = '$tmp'"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_array($res); - echo "

Trait: $row[0]

"; - } - if ($command == "save_t") { - if (!empty($_SESSION['selected_traits'])) { - $_SESSION['training_traits'] = $_SESSION['selected_traits']; - $_SESSION['training_trials'] = $_SESSION['selected_trials']; - $_SESSION['training_lines'] = $_SESSION['selected_lines']; - unset($_SESSION['selected_trials']); - unset($_SESSION['selected_lines']); - unset($_SESSION['filtered_lines']); - unset($_SESSION['filtered_markers']); - unset($_SESSION['clicked_buttons']); - } else { - echo "error - no selection found"; - } - } elseif ($command == "save_p") { - $_SESSION['predict_traits'] = $_SESSION['selected_traits']; - $_SESSION['predict_trials'] = $_SESSION['selected_trials']; - $_SESSION['predict_lines'] = $_SESSION['selected_lines']; - } elseif ($command == "clear") { - unset($_SESSION['selected_traits']); - unset($_SESSION['selected_trials']); - unset($_SESSION['selected_lines']); - unset($_SESSION['training_traits']); - unset($_SESSION['training_trials']); - unset($_SESSION['training_lines']); - unset($_SESSION['filtered_lines']); - unset($_SESSION['phenotype']); - } elseif ($command== "clear_p") { - unset($_SESSION['selected_traits']); - unset($_SESSION['selected_trials']); - unset($_SESSION['selected_lines']); - } - if (empty($_SESSION['selected_lines']) || empty($_SESSION['training_lines'])) { - ?> - -
Genome Wide Association (consensus genotype)
- 1. Select a set of lines, trait, and trials (one trait).
- 2. Select the genetic map which has the best coverage for this set.
- 3. Return to this page and select model options then GWAS Analysis
- -
Genome Wide Association (single genotype experiment)
- 1. Select a set of lines by genotype experiment.
- 2. Select a trait and phenotype trial.
- 3. Select the genetic map which has the best coverage for this set.
- 4. Return to this page and select model options then GWAS Analysis
- -
Genomic Prediction
- 1. Select a set of lines, trait, and trials (one trait).
- 2. Return to this page and select G-BLUP Analysis for cross-validation of the training set. Then save Training Set.
- 3. To select a validation set, select a new set of lines using a different trial, then return to this page for analysis.
- 4. To select a prediction set, select a new set of lines without phenotype measurements, then return to this page for analysis.
-
- -

Additional notes on GWAS and G-BLUP methods
- "; - echo "SetTrialsLines"; - $p_uid = $_SESSION['training_traits']; - $p_uid = $p_uid[0]; - $sql = "select phenotypes_name from phenotypes where phenotype_uid = $p_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_array($res); - echo "Training"; - if (!empty($_SESSION['training_trials'])) { - $tmp = $_SESSION['training_trials']; - $e_uid = implode(",",$tmp); - $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - echo "$row[0]
"; - } - } - echo ""; - if (count($_SESSION['training_lines']) > 0) { - $selectedlines = implode(",", $_SESSION['training_lines']); - $sql_option = " AND lr.line_record_uid IN ($selectedlines)"; - } else { - $sql_option = ""; - } - $sql = "SELECT count(DISTINCT lr.line_record_uid) - FROM tht_base as tb, phenotype_data as pd, phenotypes as p, line_records as lr - WHERE pd.tht_base_uid = tb.tht_base_uid - $sql_option - AND p.phenotype_uid = pd.phenotype_uid - AND lr.line_record_uid = tb.line_record_uid - AND pd.phenotype_uid = $p_uid - AND tb.experiment_uid IN ($e_uid)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_array($res); - echo "$row[0]"; - ?> - -

- - - -
- "; - } - } elseif (!empty($_SESSION['phenotype']) && !empty($_SESSION['selected_trials']) ) { - ?> - -
TraitsTrialsLinesGenetic Map -
- "; - } - echo ""; - $tmp = $_SESSION['selected_trials']; - $e_uid = implode(",",$tmp); - $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - echo "$row[0]
"; - } - echo "
"; - $count = count($_SESSION['selected_lines']); - echo "$count"; - if (isset($_SESSION['geno_exps'])) { - $geno_exp = $_SESSION['geno_exps']; - $geno_str = $geno_exp[0]; - $sql = "select marker_uid from allele_bymarker_exp_101 where experiment_uid = $geno_str and pos is not null limit 10"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row = mysqli_fetch_array($res)) { - $sql = "select trial_code from experiments where experiment_uid = $geno_str"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_array($res); - $name = $row[0]; - echo "using map from genotype experiment
$name"; - } elseif (isset($_SESSION['selected_map'])) { - $sql = "select mapset_name from mapset where mapset_uid = $map"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_assoc($res); - $map_name = $row['mapset_name']; - echo "$map_name"; - } - } elseif (isset($_SESSION['selected_map'])) { - $sql = "select mapset_name from mapset where mapset_uid = $map"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_assoc($res); - $map_name = $row['mapset_name']; - echo "$map_name"; - } - echo "
"; - if ($count < 10) { - echo "Warning: analysis may fail with only $count lines selected"; - } - $min_maf = 5; - $max_missing = 10; - $max_miss_line = 10; - $lines = $_SESSION['selected_lines']; - $count_markers = calculate_db($lines, $min_maf, $max_missing, $max_miss_line); - $count_lines = count($lines); - $estimate = ($count_markers * $count_lines) / 10000; - if ($count > 0) { - ?> -
Minimum MAF ≥
- Remove markers missing > % of data
- - Remove lines missing > % of data
- - - - Remove trait outliers - -
-
- -
-
-
-
- - -
- GWAS - principal components - -
method - EMMAX (faster but can underestimate significance)
- EMMA with REML -
G-BLUP - -

-
- - - - then continue to select prediction set -
- -
-
-
-
- - Warning, not a valid combination of traits, trials, and lines"; - } - } - ?> -

- -

Genomic Selection

- - 3) { - $histo_width = 800 + ($ntrials - 3) * 200; - } - - if(!file_exists($dir.$filename3)){ - $h = fopen($dir.$filename3, "w+"); - $png = "png(\"$dir$filename4\", width=$histo_width, height=300)\n"; - $cmd1 = "phenoData <- as.matrix(read.delim(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=1))\n"; - $cmd1 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; - $cmd2 = "phenolabel <- \"$phenolabel\"\n"; - $cmd3 = "phenounit <- \"$phenounit\"\n"; - $cmd4 = $triallabel; - fwrite($h, $png); - fwrite($h, $cmd1); - fwrite($h, $cmd2); - fwrite($h, $cmd3); - fwrite($h, $cmd4); - fclose($h); - } - exec("cat /tmp/tht/$filename3 R/GShisto.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5"); - if (file_exists("/tmp/tht/$filename5")) { - $h = fopen("/tmp/tht/$filename5", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - if (file_exists("/tmp/tht/$filename4")) { - print "
"; - } else { - echo "Error in R script R/GShisto.R
\n"; - } - } - - private function display_gwas_hits($h) { - global $mysqli; - echo "Top five marker scores from GWAS analysis
"; - echo "
markerchromposvalueexternal link (resource name)"; - $line= fgetcsv($h); - while ($line= fgetcsv($h)) { - $link = ""; - $sql = "select value, name_annotation, linkout_string_for_annotation - from markers, marker_annotations, marker_annotation_types - where markers.marker_uid = marker_annotations.marker_uid - and marker_annotations.marker_annotation_type_uid = marker_annotation_types.marker_annotation_type_uid - and marker_name = \"$line[1]\""; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_assoc($res)) { - $reg_pattern = '/XXXX/'; - $replace_string = $row['value']; - $name = $row['name_annotation']; - $source_string = $row['linkout_string_for_annotation']; - $linkString = preg_replace($reg_pattern, $replace_string, $source_string); - if ($link == "") { - if ($linkString != "") { - $link = "$replace_string ($name)"; - } - } else { - if ($linkString != "") { - $link .= "
$replace_string ($name)"; - } - } - } - if ($count < 5) { - $markerlink = "$line[1]"; - echo "
$markerlink$line[2]$line[3]$line[4]$link\n"; - } - $count++; - } - fclose($h); - echo "
"; - } - - /** - * display gwas results - */ - private function status_gwas() { - $unique_str = $_GET['unq']; - $dir = '/tmp/tht/'; - $found = 1; - $filename9 = 'THTdownload_hmp_' . $unique_str. '.txt'; - $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; - $filename3 = 'THTdownload_gwa_' . $unique_str . '.R'; - $filename4 = 'THTdownload_gwa1_' . $unique_str . '.png'; - $filename7 = 'THTdownload_gwa2_' . $unique_str . '.png'; - $filename10 = 'THTdownload_gwa3_' . $unique_str . '.png'; - $filename5 = 'process_error_gwa_' . $unique_str . '.txt'; - $filename6 = 'R_error_gwa_' . $unique_str . '.txt'; - $filename1 = 'THT_result_' . $unique_str . '.csv'; - $filenameK = 'Kinship_matrix_' . $unique_str . '.csv'; - if (file_exists("/tmp/tht/$filename7")) { - } else { - //echo "$filename7 not ready
\n"; - $found = 0; - } - if (file_exists("/tmp/tht/$filename10")) { - } else { - //echo "$filename10 not ready
\n"; - $found = 0; - } - if (file_exists("/tmp/tht/$filename4")) { - } else { - //echo "$filename4 not ready
\n"; - $found = 0; - } - if (file_exists("/tmp/tht/$filename5")) { - $h = fopen("/tmp/tht/$filename5", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - if (file_exists("/tmp/tht/$filename3")) { - // Extract the Trait name from the .R file. - $h = fopen("/tmp/tht/$filename3", "r"); - while ($line=fgets($h)) { - if (strpos($line, 'phenolabel') !== FALSE) { - $traitname = preg_replace('/phenolabel <- "(.*)"/', '$1', $line); - } - } - fclose($h); - } - if ($found) { - print "
"; - print "
"; - print "
"; - print "Trait: $traitname

"; - print "Export GWAS results to CSV file "; - print "with columns for marker name, chromosome, position, marker score

"; - print "Export Kinship matrix

"; - $count = 0; - $h = fopen("/tmp/tht/$filename1", "r"); - if ($h) { - $this->display_gwas_hits($h); - } - } else { - if (isset($_SESSION['filtered_ines'])) { - $lines = $_SESSION['filtered_lines']; - } else { - $lines = $_SESSION['selected_lines']; - } - if (isset($_SESSION['filtered_markers'])) { - $markers = $_SESSION['filtered_markers']; - } else { - $markers = $_SESSION['geno_exps_cnt']; - } - $estimate = count($lines) * count($markers); - $estimate = round($estimate/6000000,1); - echo "Results not ready yet. Estimated analysis time is $estimate minutes using default options.
"; - ?> - Select the "Check Results" button to retrieve results.
- -
-
"; - } else { - $found = 0; - } - if (file_exists("/tmp/tht/$filename4")) { - print "
"; - if (isset($_SESSION['selected_trials'])) { - print "Export prediction to CSV file

"; - } else { - print "Cross-validation of training set using 5 folds and 2 repeats.
\n"; - print "Export Cross-validated prediction to CSV file

"; - } - } else { - $found = 0; - } - - if (file_exists("/tmp/tht/$filename5")) { - $h = fopen("/tmp/tht/$filename5", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - if (file_exists("/tmp/tht/$filename6")) { - $h = fopen("/tmp/tht/$filename6", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - if ($found == 0) { - $lines = $_SESSION['filtered_lines']; - $markers = $_SESSION['filtered_markers']; - $estimate = count($lines) + count($markers); - $estimate = round($estimate/700,1); - echo "Results not ready yet. Estimated analysis time is $estimate minutes.
"; - ?> - Select the "Check Results" button to retrieve results.
- -
- /dev/null 2> /tmp/tht/$filename5"); - if (file_exists("/tmp/tht/$filename7")) { - print "
"; - } else { - echo "Error in R script
\n"; - echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; - } - if (file_exists("/tmp/tht/$filename10")) { - print "
"; - } - if (file_exists("/tmp/tht/$filename4")) { - print "
"; - print "Trait: $phenolabel

"; - print "Export GWAS results to CSV file "; - print "with columns for marker name, chromosome, position, marker score

"; - print "Export Kinship matrix

"; - $count = 0; - $h = fopen("/tmp/tht/$filename1", "r"); - if($h) { - $this->display_gwas_hits($h); - } else { - echo "error - could not open $filename1\n"; - } - } - if (file_exists("/tmp/tht/$filename5")) { - $h = fopen("/tmp/tht/$filename5", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - } - - /** - * run GWAS results in background and notify when complete - */ - private function run_gwa2() { - global $config; - global $mysqli; - $unique_str = $_GET['unq']; - $model_opt = $_GET['fixed2']; - $p3d = $_GET['p3d']; - if (isset($_SESSION['training_traits'])) { - $phenotype = $_SESSION['training_traits']; - $phenotype = $phenotype[0]; - //} elseif (isset($_SESSION['selected_traits'])) { use when multiple traits is working - } elseif (isset($_SESSION['phenotype'])) { - $phenotype = $_SESSION['phenotype']; - } - $sql = "select phenotypes_name, unit_name from phenotypes, units - where phenotypes.unit_uid = units.unit_uid - and phenotype_uid = $phenotype"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $row = mysqli_fetch_array($res); - $phenolabel = $row[0]; - //$unique_fld = chr(rand(65,80)).chr(rand(65,80)).chr(rand(65,80)).chr(rand(65,80)); - //mkdir("/tmp/tht/$unique_fld"); it would be better to put all files in directory - $dir = '/tmp/tht/'; - $filename9 = 'THTdownload_hmp_' . $unique_str. '.txt'; - $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; - $filename3 = 'THTdownload_gwa_' . $unique_str . '.R'; - $filename4 = 'THTdownload_gwa1_' . $unique_str . '.png'; - $filename7 = 'THTdownload_gwa2_' . $unique_str . '.png'; - $filename10 = 'THTdownload_gwa3_' . $unique_str . '.png'; - $filename5 = 'process_error_gwa_' . $unique_str . '.txt'; - $filename6 = 'R_error_gwa_' . $unique_str . '.txt'; - $filename1 = 'THT_result_' . $unique_str . '.csv'; - $filenameK = 'Kinship_matrix_' . $unique_str . '.csv'; - if(!file_exists($dir.$filename3)){ - $h = fopen($dir.$filename3, "w+"); - $png1 = "png(\"$dir$filename4\", width=1200, height=400)\n"; - $png2 = "png(\"$dir$filename7\", width=1200, height=400)\n"; - $png3 = "png(\"$dir$filename10\", width=1200, height=400)\n"; - $png4 = "dev.set(3)\n"; - $cmd3 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; - $cmd4 = "hmpData <- read.table(\"$dir$filename9\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", check.names = FALSE)\n"; - $cmd5 = "phenolabel <- \"$phenolabel\"\n"; - $cmd6 = "fileerr <- \"$dir$filename6\"\n"; - $cmd7 = "fileout <- \"$filename1\"\n"; - $cmd8 = "model_opt <- \"$model_opt\"\n"; - $cmd9 = "fileK <- \"$filenameK\"\n"; - if (isset($_SESSION['username'])) { - $emailAddr = $_SESSION['username']; - $emailAddr = "email <- \"$emailAddr\"\n"; - fwrite($h, $emailAddr); - $result_url = $config['base_url'] . "gensel.php?function=gwas_status&unq=$unique_str"; - $result_url = "result_url <- \"$result_url\"\n"; - fwrite($h, $result_url); - } - fwrite($h, $png1); - fwrite($h, $png2); - fwrite($h, $png3); - fwrite($h, $png4); - fwrite($h, $cmd3); - fwrite($h, $cmd4); - fwrite($h, $cmd5); - fwrite($h, $cmd6); - fwrite($h, $cmd7); - fwrite($h, $cmd8); - fwrite($h, $cmd9); - fwrite($h, "p3d <- $p3d\n"); - fwrite($h, "setwd(\"/tmp/tht/\")\n"); - fclose($h); - } - exec("cat /tmp/tht/$filename3 R/GSforGWA.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5 &"); - - if (isset($_SESSION['filtered_lines'])) { - $lines = $_SESSION['filtered_lines']; - } else { - $lines = $_SESSION['selected_lines']; - } - if (isset($_SESSION['filtered_markers'])) { - $markers = $_SESSION['filtered_markers']; - } else { - $markers = $_SESSION['geno_exps_cnt']; - } - $estimate = count($lines) * count($markers); - $estimate = round($estimate/600000,1); - echo "Estimated analysis time is $estimate minutes using default options.
"; - $emailAddr = $_SESSION['username']; - if (isset($_SESSION['username'])) { - echo "An email will be sent to $emailAddr when the job is complete
\n"; - } else { - echo "If you Login a notification will be sent upon completion
\n"; - } - ?> - Select the "Check Results" button to retrieve results.
- -
- /dev/null 2> /tmp/tht/$filename5"); - if (file_exists("/tmp/tht/$filename10")) { - print "
"; - } else { - echo "Error in R script
\n"; - echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; - } - if (file_exists("/tmp/tht/$filename4")) { - print "
"; - //if (isset($_SESSION['selected_traits'])) { use when multiple traits is supported - if (isset($_SESSION['selected_trials'])) { - print "Export prediction to CSV file

"; - } else { - print "Cross-validation of training set using 5 folds and 2 repeats.
\n"; - print "Export Cross-validated prediction to CSV file

"; - } - } else { - echo "Error in R script
\n"; - echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; - } - - if (file_exists("/tmp/tht/$filename5")) { - $h = fopen("/tmp/tht/$filename5", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - if (file_exists("/tmp/tht/$filename6")) { - $h = fopen("/tmp/tht/$filename6", "r"); - while ($line=fgets($h)) { - echo "$line
\n"; - } - fclose($h); - } - - } - - /** - * run rrBLUP R script in background and notify when complete - */ - private function run_rscript2() { - $unique_str = $_GET['unq']; - $filename1 = 'THTdownload_hapmap_' . $unique_str . '.txt'; - $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; - $filename3 = 'THTdownload_gensel_' . $unique_str . '.R'; - $filename10 = 'THTdownload_gensel2_' . $unique_str . '.png'; - $filename4 = 'THTdownload_gensel_' . $unique_str . '.png'; - $filename5 = 'THT_process_error_' . $unique_str . '.txt'; - $filename6 = 'THT_R_error_' . $unique_str . '.txt'; - $filename7 = 'THT_result_' . $unique_str . '.csv'; - exec("cat /tmp/tht/$filename3 R/GSforT34.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5 &"); - $lines = $_SESSION['filtered_lines']; - $markers = $_SESSION['filtered_markers']; - $estimate = count($lines) + count($markers); - $estimate = round($estimate/700,1); - echo "Estimated analysis time is $estimate minutes.
"; - $emailAddr = $_SESSION['username']; - if (isset($_SESSION['username'])) { - echo "An email will be sent to $emailAddr when the job is complete
\n"; - } else { - echo "If you Login a notification will be sent upon completion
\n"; - } - ?> - Select the "Check Results" button to retrieve results.
- -
- - - 0) { - $selectedlines = $_SESSION['training_lines']; - foreach ($selectedlines as $uid) { - if (!in_array($uid,$all_lines)) { - $all_lines[] = $uid; - } - } - } - - $dir = '/tmp/tht/'; - $filename1 = 'THTdownload_snp_p_' . $unique_str . '.txt'; - $filename8 = 'THTdownload_snp_t_' . $unique_str . '.txt'; - $filename9 = 'THTdownload_hmp_' . $unique_str . '.txt'; - $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; - $filename3 = 'THTdownload_gensel_' . $unique_str . '.R'; - $filename4 = 'THTdownload_gensel_' . $unique_str . '.png'; - $filename10 = 'THTdownload_gensel2_' . $unique_str . '.png'; - $filename5 = 'THT_process_error_' . $unique_str . '.txt'; - $filename6 = 'THT_R_error_' . $unique_str . '.txt'; - $filename7 = 'THT_result_' . $unique_str . '.csv'; - - //create genotype file for prediction set - if ($version == "V4") { - if ($training_lines == "") { - if(!file_exists($dir.$filename8)){ - $dtype = "qtlminer"; - $h = fopen($dir.$filename8, "w+"); - fwrite($h,$this->type2_build_markers_download($lines,$markers,$dtype)); - fclose($h); - } - } else { - //remove duplicate lines from prediction - foreach ($lines as $key => $value) { - if(in_array($value, $training_lines)){ - unset($lines[$key]); - } - } - if(!file_exists($dir.$filename8)) { - $dtype = "qtlminer"; - $h = fopen($dir.$filename8, "w+"); - fwrite($h,$this->type2_build_markers_download($training_lines,$markers,$dtype)); - fclose($h); - } - if(!file_exists($dir.$filename1)) { - $dtype = "qtlminer"; - $h = fopen($dir.$filename1, "w+"); - fwrite($h,$this->type2_build_markers_download($lines,$markers,$dtype)); - fclose($h); - } - } - } elseif ($version == "V3") { - if (isset($_SESSION['geno_exps'])) { - $experiment = $_SESSION['geno_exps']; - $geno_str = $experiment[0]; - $tmp = count($markers); - if(!file_exists($dir.$filename9)){ - $dtype = "qtlminer"; - $h = fopen($dir.$filename9, "w+"); - $output = type4BuildMarkersDownload($geno_str, $min_maf, $max_missing, $dtype, $h); - fclose($h); - } - } elseif ($training_lines == "") { - if(!file_exists($dir.$filename9)){ - $dtype = "qtlminer"; - $h = fopen($dir.$filename9, "w+"); - fwrite($h,$this->type3_build_markers_download($lines,$markers,$dtype)); - fclose($h); - } - } else { - if(!file_exists($dir.$filename9)){ - $dtype = "qtlminer"; - $h = fopen($dir.$filename9, "w+"); - fwrite($h,$this->type3_build_markers_download($training_lines,$markers,$dtype)); - fclose($h); - } - } - } - - if(!file_exists($dir.$filename2)){ - $h = fopen($dir.$filename2, "w+"); - $datasets_exp = ""; - $subset = "yes"; - fwrite($h,$this->type1_build_tassel_traits_download($experiments_t,$phenotype,$datasets_exp,$subset)); - fclose($h); - } - if(!file_exists($dir.$filename3)){ - $h = fopen($dir.$filename3, "w+"); - $png = "png(\"$dir$filename4\", width=900, height=500)\n"; - $png2 = "png(\"$dir$filename10\", width=600, height=500)\n"; - $cmd1 = "snpData_p <- read.table(\"$dir$filename1\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", row.names=1)\n"; - $cmd2 = "snpData_t <- read.table(\"$dir$filename8\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", row.names=1)\n"; - $cmd3 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; - $cmd5 = "fileerr <- \"$filename6\"\n"; - $cmd6 = "fileout <- \"$filename7\"\n"; - $cmd7 = "phenolabel <- \"$phenolabel\"\n"; - $cmd8 = "common_code <- \"" . $config['root_dir'] . "R/AmatrixStructure.R\"\n"; - $cmd9 = $triallabel; - if (isset($_SESSION['username'])) { - $emailAddr = $_SESSION['username']; - $emailAddr = "email <- \"$emailAddr\"\n"; - fwrite($h, $emailAddr); - $result_url = $config['base_url'] . "gensel.php?function=pred_status&unq=$unique_str"; - $result_url = "result_url <- \"$result_url\"\n"; - fwrite($h, $result_url); - } - - fwrite($h, $png); - fwrite($h, $png2); - if ($training_lines != "") { - fwrite($h, $cmd1); - } - fwrite($h, $cmd2); - fwrite($h, $cmd3); - fwrite($h, $cmd5); - fwrite($h, $cmd6); - fwrite($h, $cmd7); - fwrite($h, $cmd8); - fwrite($h, $cmd9); - fwrite($h, "model <- \"$model_opt\"\n"); - fwrite($h, "setwd(\"/tmp/tht/\")\n"); - fclose($h); - } - - if (($version == "V4") && (isset($_SESSION['training_lines']))) { - if (count($_SESSION['training_lines']) < 50) { - echo "skip CrossValidation because traing set has less than 50 lines
\n"; - } - } - } - - /** - * generate download files in qltminer format - * @param unknown_type $experiments - * @param unknown_type $traits - * @param unknown_type $datasets - */ - function type1_build_traits_download($experiments, $traits, $datasets) - { - global $mysqli; - - $output = 'Experiment' . $this->delimiter . 'Inbred'; - $traits = explode(',', $traits); - - - $select = "SELECT experiments.trial_code, line_records.line_record_name"; - $from = " FROM tht_base - JOIN experiments ON experiments.experiment_uid = tht_base.experiment_uid - JOIN line_records ON line_records.line_record_uid = tht_base.line_record_uid "; - foreach ($traits as $trait) { - $from .= " JOIN ( - SELECT p.phenotypes_name, pd.value, pd.tht_base_uid, pmd.number_replicates, pmd.experiment_uid - FROM phenotypes AS p, phenotype_data AS pd, phenotype_mean_data AS pmd - WHERE pd.phenotype_uid = p.phenotype_uid - AND pmd.phenotype_uid = p.phenotype_uid - AND p.phenotype_uid = ($trait)) AS t$trait - - ON t$trait.tht_base_uid = tht_base.tht_base_uid AND t$trait.experiment_uid = tht_base.experiment_uid"; - $select .= ", t$trait.phenotypes_name as name$trait, t$trait.value as value$trait, t$trait.number_replicates as nreps$trait"; - } - $where = " WHERE tht_base.experiment_uid IN ($experiments) - AND tht_base.check_line = 'no' - AND tht_base.datasets_experiments_uid in ($datasets)"; - - $res = mysqli_query($mysqli, $select.$from.$where) or die(mysqli_error($mysqli)); - - $namevaluekeys = null; - $valuekeys = array(); - while($row = mysqli_fetch_assoc($res)) { - if ($namevaluekeys == null) - { - $namevaluekeys = array_keys($row); - unset($namevaluekeys[array_search('trial_code', $namevaluekeys)]); - //unset($namevaluekeys[array_search('number_replications', $namevaluekeys)]); - unset($namevaluekeys[array_search('line_record_name', $namevaluekeys)]); - - foreach($namevaluekeys as $namevaluekey) { - if (stripos($namevaluekey, 'name') !== FALSE) { - $output .= $this->delimiter . "{$row[$namevaluekey]}" . $this->delimiter . "N"; - } else { - array_push($valuekeys, $namevaluekey); - } - } - $output .= "\n"; - } - $output .= "{$row['trial_code']}" . $this->delimiter . "{$row['line_record_name']}"; - foreach($valuekeys as $valuekey) { - if (is_null($row[$valuekey])) - $row[$valuekey] = 'N/A'; - $output .= $this->delimiter . "{$row[$valuekey]}" ; - } - $output .= "\n"; - } - - return $output; - } - - /** - * Build trait download file for Tassel program interface - * @param unknown_type $experiments - * @param unknown_type $traits - * @param unknown_type $datasets - * @param unknown_type $subset - * @return string - * - * modified to work with only one trait - * if trait measured more than once then add AVG() and GROUP by - * for R script the line names have to be quoted or special characters will cause problems - */ - function type1_build_tassel_traits_download($experiments, $traits, $datasets, $subset) { - global $mysqli; - $delimiter = "\t"; - $output = ''; - $outputheader1 = ''; - $outputheader3 = ""; - - //only use first trait - $pattern = "/([0-9]+)/"; - if (preg_match($pattern,$traits,$match)) { - $traits = $match[1]; - } else { - echo "error - can not identify trait $traits\n"; - die(); - } - - if (isset($_SESSION['filtered_lines'])) { - $lines = $_SESSION['filtered_lines']; - } else { - die("Error: should have lines selected
\n"); - } - $selectedlines = implode(",", $lines); - $outputheader2 = "gid" . $delimiter . "pheno" . $delimiter . "trial" . $delimiter . "year"; - - $sql_option = ""; - if ($subset == "yes" && count($_SESSION['filtered_lines']) > 0) { - $selectedlines = implode(",", $_SESSION['filtered_lines']); - $sql_option = " AND lr.line_record_uid IN ($selectedlines)"; - } else { - die("Error: should have lines selected
\n"); - } - if (preg_match("/\d/",$experiments)) { - $sql_option .= "AND tb.experiment_uid IN ($experiments)"; - } - if (preg_match("/\d/",$datasets)) { - $sql_option .= "AND ((tht_base.datasets_experiments_uid in ($datasets) AND tht_base.check_line='no') OR (tht_base.check_line='yes'))"; - } - - // get a list of all line names in the selected datasets and experiments, - // INCLUDING the check lines // AND tht_base.check_line IN ('no') - $sql = "SELECT DISTINCT lr.line_record_name, lr.line_record_uid - FROM line_records as lr, tht_base as tb, phenotype_data as pd - WHERE lr.line_record_uid=tb.line_record_uid - AND pd.tht_base_uid = tb.tht_base_uid - AND pd.phenotype_uid = $traits - $sql_option"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while($row = mysqli_fetch_array($res)) { - $lines_names[] = $row['line_record_name']; - $line_uid[] = $row['line_record_uid']; - } - $nlines = count($lines_names); - //die($sql . "
" . $nlines); - - $outputheader1 = "$nlines".$delimiter."$ncols".$delimiter.$nheaderlines; - $output = $outputheader2."\n"; - - //add lines from pred set - if(isset($_SESSION['training_lines'])) { - if (isset($_SESSION['selected_lines'])){ - $selectedlines = $_SESSION['selected_lines']; - } - if (isset($_SESSION['selected_trials'])) { - $selectedtrials = $_SESSION['selected_trials']; - $selectedtrials = implode(",",$selectedtrials); - } - } else { - $selectedlines = array(); - $selectedtrials = ""; - } - if (preg_match("/\d/",$selectedtrials)) { - $sql_option = " WHERE tb.experiment_uid IN ($selectedtrials) AND "; - } else{ - $sql_option = " WHERE "; - } - foreach ($selectedlines as $uid) { - if (!in_array($uid,$line_uid)) { - $sql = "SELECT line_record_name, tb.experiment_uid, experiment_year as exper - from line_records as lr, tht_base as tb, experiments as exp - $sql_option - lr.line_record_uid=tb.line_record_uid - and tb.experiment_uid = exp.experiment_uid - and lr.line_record_uid = $uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if (preg_match("/\d/",$selectedtrials)) { //for case where there are phenotype measurements - while ($row = mysqli_fetch_array($res)) { - $line_name = $row[0]; - $exper = $row[1]; - $year = $row[2]; - $sql = "select pd.value as value - from tht_base as tb, phenotype_data as pd - WHERE tb.experiment_uid = $exper AND - tb.line_record_uid = $uid - AND pd.tht_base_uid = tb.tht_base_uid - AND pd.phenotype_uid = $traits"; - $res2 = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row2 = mysqli_fetch_array($res2)) { - $value = $row2['value']; - } else { - $value = "-999"; - } - - $outline = $line_name.$delimiter.$value.$delimiter.$exper.$delimiter.$year."\n"; - $output .= $outline; - } - } else { //for case where there are no phenotype measurements - if ($row = mysqli_fetch_array($res)) { - $line_name = $row[0]; - $year = $row[2]; - $exper = 0; //use 0 to indicate the prediction set - $value = "-999"; - $outline = $line_name.$delimiter.$value.$delimiter.$exper.$delimiter.$year."\n"; - $output .= $outline; - } - } - } else { - //echo "dropped from prediction $uid
\n"; - } - } - - // loop through all the lines in the file - for ($i=0;$i<$nlines;$i++) { - if (preg_match("/\d/",$experiments)) { - $sql_option = " WHERE tb.experiment_uid IN ($experiments) AND "; - } else { - $sql_option = " WHERE "; - } - $sql = "SELECT pd.value as value, pd.phenotype_uid, tb.experiment_uid as exper, experiment_year, tb.tht_base_uid - FROM tht_base as tb, phenotype_data as pd, experiments as exp - $sql_option - tb.line_record_uid = $line_uid[$i] - AND pd.tht_base_uid = tb.tht_base_uid - AND tb.experiment_uid = exp.experiment_uid - AND pd.phenotype_uid = $traits"; - // GROUP BY tb.tht_base_uid, pd.phenotype_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $found = 0; - while ($row = mysqli_fetch_array($res)) { - $found = 1; - $outline = "'$lines_names[$i]'".$delimiter.$row['value'].$delimiter.$row['exper'].$delimiter.$row['experiment_year']."\n"; - if ($removeOutlier == "Y") { - $line = $lines_names[$i]; - $exp = $row['exper']; - if (isset($_SESSION['outliers'][$line][$traits][$exp])) { - echo "skip\n"; - continue; - } - } - $output .= $outline; - } - if ($found == 0) { - $outline = "'$lines_names[$i]'".$delimiter."999".$delimiter."999".$delimiter."999\n"; - $output .= $outline; - } - - } - - return $output; - } - - /** - * Build trait download file for Tassel program interface - * @param string $experiments - * @param unknown_type $traits - * @param unknown_type $lines - * @param unknown_type $subset - * @return string - */ - function type2_build_tassel_traits_download($experiments, $traits, $lines, $subset) - { - global $mysqli; - $delimiter = "\t"; - $output = ''; - $outputheader1 = ''; - $outputheader2 = ''; - $outputheader3 = ''; - - //count number of traits and number of experiments - $ntraits=substr_count($traits, ',')+1; - $nexp=substr_count($experiments, ',')+1; - - // figure out which traits are at which location - if ($experiments=="") { - $sql_option = ""; - } else { - $sql_option = "AND tb.experiment_uid IN ($experiments)"; - } - - $selectedlines = implode(",", $lines); - $sql_option = $sql_option . " AND tb.line_record_uid IN ($selectedlines)"; - $sql = "SELECT DISTINCT e.trial_code, e.experiment_uid, p.phenotypes_name,p.phenotype_uid - FROM experiments as e, tht_base as tb, phenotype_data as pd, phenotypes as p - WHERE - e.experiment_uid = tb.experiment_uid - $sql_option - AND pd.tht_base_uid = tb.tht_base_uid - AND p.phenotype_uid = pd.phenotype_uid - AND pd.phenotype_uid IN ($traits) - ORDER BY p.phenotype_uid,e.experiment_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $ncols = mysqli_num_rows($res); - while($row = mysqli_fetch_array($res)) { - $outputheader2 .= str_replace(" ","_",$row['phenotypes_name']).$delimiter; - $outputheader3 .= $row['trial_code'].$delimiter; - $keys[] = $row['phenotype_uid'].$row['experiment_uid']; - } - $nexp=$ncols; - - $sql = "SELECT DISTINCT line_records.line_record_name, line_records.line_record_uid - FROM line_records - where line_record_uid IN ($selectedlines)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while($row = mysqli_fetch_array($res)) { - $lines_names[] = $row['line_record_name']; - $line_uid[] = $row['line_record_uid']; - } - $nlines = count($lines); - //die($sql . "
" . $nlines); - - if ($nexp ===1){ - $nheaderlines = 1; - } else { - $nheaderlines = 2; - } - $outputheader1 = "$nlines".$delimiter."$ncols".$delimiter.$nheaderlines; - //if (DEBUG>1) echo $outputheader1."\n".$outputheader2."\n".$outputheader3."\n"; - // $firephp->log("number traits and lines ".$outputheader1); - if ($nexp ===1){ - $output = $outputheader2."\n"; - } else { - $output = $outputheader3."\n"; - } - - - // loop through all the lines in the file - for ($i=0;$i<$nlines;$i++) { - $outline = $lines_names[$i].$delimiter; - // get selected traits for this line in the selected experiments, change for multiple check lines - /* $sql = "SELECT pd.phenotype_uid, pd.value, tb.experiment_uid - FROM tht_base as tb, phenotype_data as pd - WHERE - tb.line_record_uid = $line_uid[$i] - AND tb.experiment_uid IN ($experiments) - AND pd.tht_base_uid = tb.tht_base_uid - AND pd.phenotype_uid IN ($traits) - ORDER BY pd.phenotype_uid,tb.experiment_uid";*/ - // dem 8oct10: Don't round the data. - // $sql = "SELECT avg(cast(pd.value AS DECIMAL(9,1))) as value,pd.phenotype_uid,tb.experiment_uid - if (preg_match("/\d/",$experiments)) { - $sql_option = " WHERE tb.experiment_uid IN ($experiments) AND "; - } else { - $sql_option = " WHERE "; - } - $sql = "SELECT pd.value as value, pd.phenotype_uid, tb.experiment_uid, tb.tht_base_uid - FROM tht_base as tb, phenotype_data as pd - $sql_option - tb.line_record_uid = $line_uid[$i] - AND pd.tht_base_uid = tb.tht_base_uid - AND pd.phenotype_uid IN ($traits) - GROUP BY tb.tht_base_uid, pd.phenotype_uid"; - //echo "$i $nlines $sql
"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - // // $firephp->log("sql ".$i." ".$sql); - $outarray = array_fill(0,$ncols,-999); - //// $firephp->table('outarray label values', $outarray); - //$outarray = array_fill_keys( $keys , -999); - $outarray = array_combine($keys , $outarray); - //// $firephp->table('outarray label ', $outarray); - while ($row = mysqli_fetch_array($res)) { - $keyval = $row['phenotype_uid'].$row['experiment_uid']; - // $firephp->log("keyvals ".$keyval." ".$row['value']); - $outarray[$keyval]= $row['value']; - } - $outline .= implode($delimiter,$outarray)."\n"; - //// $firephp->log("outputline ".$i." ".$outline); - $output .= $outline; - - } - - return $output; - } - - /** - * build genotype data file for tassle V2 and V3 - * @param unknown_type $experiments - * @param unknown_type $dtype - */ - function type1_build_markers_download($experiments,$dtype) - { - global $mysqli; - $outputheader = ''; - $output = ''; - $doneheader = false; - $delimiter ="\t"; - - if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) - $max_missing = $_GET['mm']; - if ($max_missing>100) - $max_missing = 100; - elseif ($max_missing<0) - $max_missing = 0; - // $firephp->log("in sort markers2"); - $min_maf = 0.01;//IN PERCENT - if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) - $min_maf = $_GET['mmaf']; - if ($min_maf>100) - $min_maf = 100; - elseif ($min_maf<0) - $min_maf = 0; - // $firephp->log("in sort markers".$max_missing." ".$min_maf); - - //get lines and filter to get a list of markers which meet the criteria selected by the user - $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, - SUM(af.total) as total, SUM(af.ab_cnt) AS sumab - FROM allele_frequencies AS af, markers as m - WHERE m.marker_uid = af.marker_uid - AND af.experiment_uid in ($experiments) - group by af.marker_uid"; - - $res = mysqli_query($myslqi, $sql_mstat) or die(mysqli_error($mysqli)); - $num_maf = $num_miss = 0; - while ($row = mysqli_fetch_array($res)){ - $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); - $miss = round(100*$row["summis"]/$row["total"],1); - if (($maf >= $min_maf)AND ($miss<=$max_missing)) { - $marker_names[] = $row["name"]; - $outputheader .= $row["name"].$delimiter; - $marker_uid[] = $row["marker"]; - } - } - $nelem = count($marker_names); - if ($nelem == 0) { - die("error - no genotype or marker data for this experiment, experiment_uid=$experiments"); - } - $marker_uid = implode(",",$marker_uid); - - if ($dtype=='qtlminer') { - $lookup = array( - 'AA' => '1', - 'BB' => '-1', - '--' => 'NA', - 'AB' => '0' - ); - } else { - $lookup = array( - 'AA' => '1:1', - 'BB' => '2:2', - '--' => '?', - 'AB' => '1:2' - ); - } - - // make an empty line with the markers as array keys, set default value - // to the default missing value for either qtlminer or tassel - // places where the lines may have different values - - if ($dtype =='qtlminer') { - $empty = array_combine($marker_names,array_fill(0,$nelem,'NA')); - } else { - $empty = array_combine($marker_names,array_fill(0,$nelem,'?')); - } - - - $sql = "SELECT line_record_name, marker_name AS name, - alleles AS value - FROM - allele_cache as a - WHERE - a.marker_uid IN ($marker_uid) - AND a.experiment_uid IN ($experiments) - ORDER BY a.line_record_uid, a.marker_uid"; - - - $last_line = "some really silly name that noone would call a plant"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - - $outarray = $empty; - $cnt = $num_lines = 0; - while ($row = mysqli_fetch_array($res)){ - //first time through loop - if ($cnt==0) { - $last_line = $row['line_record_name']; - } - - if ($last_line != $row['line_record_name']){ - // Close out the last line - $output .= "$last_line\t"; - $outarray = implode($delimiter,$outarray); - $output .= $outarray."\n"; - //reset output arrays for the next line - $outarray = $empty; - $mname = $row['name']; - $outarray[$mname] = $lookup[$row['value']]; - $last_line = $row['line_record_name']; - $num_lines++; - } else { - $mname = $row['name']; - $outarray[$mname] = $lookup[$row['value']]; - } - $cnt++; - } - //NOTE: there is a problem with the last line logic here. Must fix. - //save data from the last line - $output .= "$last_line\t"; - $outarray = implode($delimiter,$outarray); - $output .= $outarray."\n"; - $num_lines++; - - if ($dtype =='qtlminer') { - return $outputheader."\n".$output; - } else { - return $num_lines.$delimiter.$nelem.":2\n".$outputheader."\n".$output; - } - } - - /** - * build file listing conflicts in genotype data - * @param unknown_type $experiments - * @param unknown_type $dtype - */ - function type1_build_conflicts_download($experiments,$dtype) { - global $mysqli; - //get lines and filter to get a list of markers which meet the criteria selected by the user - $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, - SUM(af.total) as total, SUM(af.ab_cnt) AS sumab - FROM allele_frequencies AS af, markers as m - WHERE m.marker_uid = af.marker_uid - AND af.experiment_uid in ($experiments) - group by af.marker_uid"; - - $res = mysqli_query($mysqli, $sql_mstat) or die(mysqli_error($mysqli)); - $num_maf = $num_miss = 0; - while ($row = mysqli_fetch_array($res)){ - $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); - $miss = round(100*$row["summis"]/$row["total"],1); - if (($maf >= $min_maf)AND ($miss<=$max_missing)) { - $marker_uid[] = $row["marker"]; - } - } - $marker_uid = implode(",",$marker_uid); - $output = "line name\tmarker name\talleles\texperiment\n"; - $query = "select l.line_record_name, m.marker_name, a.alleles, e.trial_code - from allele_conflicts a, line_records l, markers m, experiments e - where a.line_record_uid = l.line_record_uid - and a.marker_uid = m.marker_uid - and a.experiment_uid = e.experiment_uid - and a.alleles != '--' - and a.marker_uid IN ($marker_uid) - order by l.line_record_name, m.marker_name, e.trial_code"; - $res = mysqli_query($mysqli, $query) or die(mysqli_error($mysqli)); - if (mysqli_num_rows($res)>0) { - while ($row = mysqli_fetch_row($res)){ - $output.= "$row[0]\t$row[1]\t$row[2]\t$row[3]\n"; - } - } - return $output; - } - - /** - * build genotype data file when given set of lines and markers - * @param unknown_type $lines - * @param unknown_type $markers - * @param unknown_type $dtype - */ - function type2_build_markers_download($lines,$markers,$dtype) - { - global $mysqli; - $outputheader = ''; - $output = ''; - $doneheader = false; - $delimiter ="\t"; - $max_missing = 10; - $min_maf = 5; - - if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) - $max_missing = $_GET['mm']; - if ($max_missing>100) - $max_missing = 100; - elseif ($max_missing<0) - $max_missing = 0; - // $firephp->log("in sort markers2"); - if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) - $min_maf = $_GET['mmaf']; - if ($min_maf>100) - $min_maf = 100; - elseif ($min_maf<0) - $min_maf = 0; - // $firephp->log("in sort markers".$max_missing." ".$min_maf); - - if (count($markers)>0) { - $markers_str = implode(",", $markers); - } else { - die("error - markers should be selected before download\n"); - } - if (count($lines)>0) { - $lines_str = implode(",", $lines); - } else { - $lines_str = ""; - die("error - must make line selection first
\n"); - } - - //generate an array of selected markers that can be used with isset statement - foreach ($markers as $temp) { - $marker_lookup[$temp] = 1; - } - - $sql = "select marker_uid, marker_name from allele_byline_idx order by marker_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $i=0; - while ($row = mysqli_fetch_array($res)) { - $marker_list[$i] = $row[0]; - $marker_list_name[$i] = $row[1]; - $i++; - } - - foreach ($marker_list as $i => $marker_id) { - $marker_name = $marker_list_name[$i]; - if (isset($marker_lookup[$marker_id])) { - $marker_names[] = $marker_name; - $outputheader .= $marker_name.$delimiter; - $marker_uid[] = $marker_id; - } - } - - if ($dtype=='qtlminer') { - $lookup = array( - 'AA' => '1', - 'BB' => '-1', - '--' => 'NA', - 'AB' => '0', - '' => 'NA' - ); - } else { - $lookup = array( - 'AA' => '1:1', - 'BB' => '2:2', - '--' => '?', - 'AB' => '1:2', - '' => '?' - ); - } - - foreach ($lines as $line_record_uid) { - $sql = "select line_record_name, alleles from allele_byline where line_record_uid = $line_record_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row = mysqli_fetch_array($res)) { - $outarray2 = array(); - $outarray2[] = $row[0]; - $alleles = $row[1]; - $outarray = explode(',',$alleles); - $i=0; - foreach ($outarray as $allele) { - $marker_id = $marker_list[$i]; - if (isset($marker_lookup[$marker_id])) { - $outarray2[]=$lookup[$allele]; - } - $i++; - } - } else { - $sql = "select line_record_name from line_records where line_record_uid = $line_record_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row = mysqli_fetch_array($res)) { - $outarray2 = array(); - $outarray2[] = $row[0]; - $i=0; - foreach ($marker_list as $marker_id) { - if (isset($marker_lookup[$marker_id])) { - $outarray2[]=$lookup[""]; - } - $i++; - } - } else { - die("error - could not find uid\n"); - } - } - $outarray = implode($delimiter,$outarray2); - $output .= $outarray . "\n"; - } - $nelem = count($marker_names); - $num_lines = count($lines); - if ($nelem == 0) { - die("error - no genotype or marker data for this selection"); - } - - // make an empty line with the markers as array keys, set default value - // to the default missing value for either qtlminer or tassel - // places where the lines may have different values - - if ($dtype =='qtlminer') { - $empty = array_combine($marker_names,array_fill(0,$nelem,'NA')); - } else { - $empty = array_combine($marker_names,array_fill(0,$nelem,'?')); - } - - if ($dtype =='qtlminer') { - return $outputheader."\n".$output; - } else { - return $num_lines.$delimiter.$nelem.":2\n".$outputheader."\n".$output; - } - } - - /** - * build genotype data files for tassel V4 - * @param unknown_type $lines - * @param unknown_type $markers - * @param unknown_type $dtype - */ - function type3_build_markers_download($lines,$markers,$dtype) - { - global $mysqli; - $output = ''; - $outputheader = ''; - $delimiter ="\t"; - - if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) { - $max_missing = $_GET['mm']; - } - if ($max_missing>100) { - $max_missing = 100; - } elseif ($max_missing<0) { - $max_missing = 0; - } - $min_maf = 0.01;//IN PERCENT - if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) { - $min_maf = $_GET['mmaf']; - } - if ($min_maf>100) { - $min_maf = 100; - } elseif ($min_maf<0) { - $min_maf = 0; - } - if (isset($_SESSION['selected_map'])) { - $selected_map = $_SESSION['selected_map']; - } else { - $selected_map = 1; - } - - if (count($markers)>0) { - $markers_str = implode(",", $markers); - } else { - die("Error - markers should be selected before analysis"); - } - if (count($lines)>0) { - $lines_str = implode(",", $lines); - } else { - die("Error - lines should be selected before analysis"); - } - - //generate an array of selected lines that can be used with isset statement - foreach ($lines as $temp) { - $line_lookup[$temp] = 1; - } - - $sql = "select line_record_uid, line_record_name from allele_bymarker_idx order by line_record_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $i=0; - while ($row = mysqli_fetch_array($res)) { - $line_list[$i] = $row[0]; - $line_list_name[$i] = $row[1]; - $i++; - } - - //order the markers by map location - $sql = "select markers.marker_uid, markers.marker_name, mim.chromosome, mim.start_position from markers, markers_in_maps as mim, map, mapset - where markers.marker_uid IN ($markers_str) - AND mim.marker_uid = markers.marker_uid - AND mim.map_uid = map.map_uid - AND map.mapset_uid = mapset.mapset_uid - AND mapset.mapset_uid = $selected_map - order by mim.chromosome, CAST(1000*mim.start_position as UNSIGNED), BINARY markers.marker_name"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - $marker_uid = $row[0]; - $chr = $row[2]; - $pos = $row[3]; - $marker_list_mapped[$marker_uid] = $pos; - $marker_list_chr[$marker_uid] = $chr; - } - - $marker_list_all = $marker_list_mapped; - //generate an array of selected markers and add map position if available - $sql = "select marker_uid, marker_name, A_allele, B_allele, marker_type_name from markers, marker_types - where marker_uid IN ($markers_str) - AND markers.marker_type_uid = marker_types.marker_type_uid - order by BINARY marker_name"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - $marker_uid = $row[0]; - $marker_name = $row[1]; - if (isset($marker_list_all[$marker_uid])) { - } else { - $marker_list_all[$marker_uid] = 0; - } - if (preg_match("/[A-Z]/",$row[2]) && preg_match("/[A-Z]/",$row[3])) { - $allele = $row[2] . "/" . $row[3]; - } elseif (preg_match("/DArT/",$row[4])) { - $allele = $row[2] . "/" . $row[3]; - } else { - $allele = "N/N"; - } - $marker_list_name[$marker_uid] = $marker_name; - $marker_list_allele[$marker_uid] = $allele; - } - - //get location in allele_byline for each marker - $sql = "select marker_uid, marker_name from allele_byline_idx order by marker_uid"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - $i=0; - while ($row = mysqli_fetch_array($res)) { - $marker_idx_list[$row[0]] = $i; - $i++; - } - - //get header - $empty = array(); - $outputheader = "rs\talleles\tchrom\tpos"; - $sql = "select line_record_name from line_records where line_record_uid IN ($lines_str)"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - $name = $row[0]; - $outputheader .= "\t$name"; - $empty[$name] = "NN"; - } - - //using a subset of markers so we have to translate into correct index - //if there is no map then use chromosome 0 and index for position - $pos_index = 0; - foreach ($marker_list_all as $marker_id => $val) { - $marker_idx = $marker_idx_list[$marker_id]; - $marker_name = $marker_list_name[$marker_id]; - $allele = $marker_list_allele[$marker_id]; - - $lookup = array( - 'AA' => 1, - 'BB' => -1, - '--' => 'NA', - 'AB' => 0, - 'BA' => 0, - '' => 'NA' - ); - - $sql = "select A_allele, B_allele, mim.chromosome, mim.start_position from markers, markers_in_maps as mim, map, mapset where markers.marker_uid = $marker_id - AND mim.marker_uid = markers.marker_uid - AND mim.map_uid = map.map_uid - AND map.mapset_uid = mapset.mapset_uid - AND mapset.mapset_uid = $selected_map"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row = mysqli_fetch_array($res)) { - $chrom = $row[2]; - if (preg_match('/[0-9]+/',$chrom, $match)) { - $pos = 100 * $row[3]; - } else { - $chrom = 0; - $pos = $pos_index; - $pos_index += 10; - } - } else { - $chrom = 0; - $pos = $pos_index; - $pos_index += 10; - } - $outarray2 = array(); - $sql = "select marker_name, alleles from allele_bymarker where marker_uid = $marker_id"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - if ($row = mysqli_fetch_array($res)) { - $alleles = $row[1]; - $outarray = explode(',',$alleles); - foreach ($outarray as $key=>$allele) { - $line_id = $line_list[$key]; - if (isset($line_lookup[$line_id])) { - $outarray2[]=$lookup[$allele]; - } - } - $allele_str = implode("\t",$outarray2); - $output .= "$marker_name\t$allele\t$chrom\t$pos"; - $output .= "\t$allele_str\n"; - } else { - echo "Error - could not find marker_uid $marker_id
\n"; - } - } - return $outputheader."\n".$output; - } - - /** - * build genotype conflicts file when given set of lines and markers - * @param unknown_type $lines - * @param unknown_type $markers - * @return string - */ - function type2_build_conflicts_download($lines,$markers) { - global $mysqli; - - if (count($markers)>0) { - $markers_str = implode(",",$markers); - } else { - $markers_str = ""; - } - if (count($lines)>0) { - $lines_str = implode(",",$lines); - } else { - $lines_str = ""; - } - //get lines and filter to get a list of markers which meet the criteria selected by the user - if (preg_match('/[0-9]/',$markers_str)) { - } else { - //get genotype markers that correspond with the selected lines - $sql_exp = "SELECT DISTINCT marker_uid FROM allele_cache - WHERE - allele_cache.line_record_uid in ($lines_str)"; - $res = mysqli_query($mysqli, $sql_exp) or die(mysql_error($mysqli)); - if (mysqli_num_rows($res)>0) { - while ($row = mysqli_fetch_array($res)){ - $markers[] = $row["marker_uid"]; - } - } - $markers_str = implode(',',$markers); - } - $output = "line name\tmarker name\talleles\texperiment\n"; - $query = "select l.line_record_name, m.marker_name, a.alleles, e.trial_code - from allele_conflicts a, line_records l, markers m, experiments e - where a.line_record_uid = l.line_record_uid - and a.marker_uid = m.marker_uid - and a.experiment_uid = e.experiment_uid - and a.alleles != '--' - and a.line_record_uid IN ($lines_str) - and a.marker_uid IN ($markers_str) - order by l.line_record_name, m.marker_name, e.trial_code"; - $res = mysqli_query($mysqli, $query) or die(mysqli_error($mysqli)); - if (mysqli_num_rows($res)>0) { - while ($row = mysqli_fetch_row($res)){ - $output.= "$row[0]\t$row[1]\t$row[2]\t$row[3]\n"; - } - } - return $output; - } - - /** - * create map file in Tassel V2 format - * @param string $experiments - * @return string - */ - function type1_build_annotated_align($experiments) - { - global $mysqli; - $delimiter ="\t"; - $output = ''; - $doneheader = false; - if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) - $max_missing = $_GET['mm']; - if ($max_missing>100) - $max_missing = 100; - elseif ($max_missing<0) - $max_missing = 0; - // $firephp->log("in sort markers2"); - $min_maf = 0.01;//IN PERCENT - if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) - $min_maf = $_GET['mmaf']; - if ($min_maf>100) - $min_maf = 100; - elseif ($min_maf<0) - $min_maf = 0; - // $firephp->log("in sort markers".$max_missing." ".$min_maf); - - //get lines and filter to get a list of markers which meet the criteria selected by the user - $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, - SUM(af.total) as total, SUM(af.ab_cnt) AS sumab - FROM allele_frequencies AS af, markers as m - WHERE m.marker_uid = af.marker_uid - AND af.experiment_uid in ($experiments) - group by af.marker_uid"; - - $res = mysqli_query($mysqli, $sql_mstat) or die(mysqli_error($mysqli)); - $num_maf = $num_miss = 0; - - while ($row = mysqli_fetch_array($res)){ - $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); - $miss = round(100*$row["summis"]/$row["total"],1); - if (($maf >= $min_maf)AND ($miss<=$max_missing)) { - $marker_names[] = $row["name"]; - $outputheader .= $delimiter.$row["name"]; - $marker_uid[] = $row["marker"]; - - } - } - // $firephp->log($marker_uid); - - $lookup = array( - 'AA' => 'A','BB' => 'B','--' => '-','AB' => 'C' - ); - $lookup_chrom = array( - '1H' => '1','2H' => '2','3H' => '3','4H' => '4','5H' => '5', - '6H' => '6','7H' => '7','UNK' => '10' - ); - - // finish writing file header using a list of line names - $sql = "SELECT DISTINCT lr.line_record_name AS line_name - FROM line_records AS lr, tht_base AS tb - WHERE - lr.line_record_uid = tb.line_record_uid - AND tb.experiment_uid IN ($experiments) - ORDER BY line_name"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - while ($row = mysqli_fetch_array($res)) { - $line_names[] = $row['line_name']; - } - - // make an empty marker with the lines as array keys - $nelem = count($marker_uid); - $n_lines = count($line_names); - $empty = array_combine($line_names,array_fill(0,$n_lines,'-')); - $nemp = count($empty); - $marker_uid = implode(",",$marker_uid); - $line_str = implode($delimiter,$line_names); - // $firephp = log($nelem." ".$n_lines); - - // write output file header - $outputheader = "\n".$delimiter."Yes\n"; - $outputheader .= "".$delimiter.$n_lines."\n"; - $outputheader .= "".$delimiter.$nelem."\n"; - $outputheader .= "".$delimiter."Catagorical\n"; - $outputheader .= "".$delimiter."No\n"; - $outputheader .= "".$delimiter.$line_str."\n"; - $outputheader .= "".$delimiter."".$delimiter."".$delimiter."\n"; - // $firephp = log($outputheader); - - // get marker map data, line and marker names; use latest consensus map - // as the map default - $mapset = 1; - $sql = "SELECT mim.chromosome, mim.start_position, lr.line_record_name as lname, m.marker_name AS mname, - CONCAT(a.allele_1,a.allele_2) AS value - FROM - markers as m, - markers_in_maps as mim, - map, - mapset, - line_records as lr, - alleles as a, - tht_base as tb, - genotyping_data as gd - WHERE - a.genotyping_data_uid = gd.genotyping_data_uid - AND mim.marker_uid = m.marker_uid - AND m.marker_uid = gd.marker_uid - AND gd.marker_uid IN ($marker_uid) - AND mim.map_uid = map.map_uid - AND map.mapset_uid = mapset.mapset_uid - AND mapset.mapset_uid = '$mapset' - AND tb.line_record_uid = lr.line_record_uid - AND gd.tht_base_uid = tb.tht_base_uid - AND tb.experiment_uid IN ($experiments) - ORDER BY mim.chromosome,mim.start_position, m.marker_uid, lname"; - - - $last_marker = "somemarkername"; - $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); - - $outarray = $empty; - $cnt = $num_markers = 0; - while ($row = mysqli_fetch_array($mysqli, $res)){ - //first time through loop - if ($cnt==0) { - $last_marker = $row['mname']; - $pos = $row['start_position']; - $chrom = $lookup_chrom[$row['chromosome']]; - } - - if ($last_marker != $row['mname']){ - // Close out the last marker - $output .= "$chrom\t$pos\t$last_marker\t"; - $outarray = implode("",$outarray); - $output .= $outarray."\n"; - //reset output arrays for the next line - $outarray = $empty; - $lname = $row['lname']; //start new line - $outarray[$lname] = $lookup[$row['value']]; - $last_marker = $row['mname']; - $pos = $row['start_position']; - $chrom = $lookup_chrom[$row['chromosome']]; - $num_markers++; - } else { - $lname = $row['lname']; - $outarray[$lname] = $lookup[$row['value']]; - } - $cnt++; - } - - //save data from the last line - $output .= "$chrom\t$pos\t$last_marker\t"; - $outarray = implode("",$outarray); - $output .= $outarray."\n"; - $num_markers++; - - return $outputheader.$output; - } -}// end class +if (isset($_GET['function'])) { + new Downloads($_GET['function']); +} else { + new Downloads('web'); +} diff --git a/gensel_class.php b/gensel_class.php new file mode 100644 index 00000000..f3d99e9c --- /dev/null +++ b/gensel_class.php @@ -0,0 +1,2458 @@ + + * @license http://triticeaetoolbox.org/wheat/docs/LICENSE Berkeley-based + * @link http://triticeaetoolbox.org/wheat/downloads/downloads.php + **/ + +class Downloads +{ + /** + * Delimiter used for output files + */ + public $delimiter = "\t"; + + /** + * Using the class's constructor to decide which action to perform + * + * @param string $function action to perform + */ + public function __construct($function = null) + { + switch ($function) { + case 'genomic_prediction': + $this->genomic_prediction(); + break; + case 'run_histo': + $this->run_histo(); + break; + case 'run_gwa': + $this->run_gwa(); + break; + case 'run_gwa2': + $this->run_gwa2(); + break; + case 'run_rscript': + $this->run_rscript(); + break; + case 'run_rscript2': + $this->run_rscript2(); + break; + case 'download_session_v2': + $this->type1_session(V2); + break; + case 'download_session_v3': + $this->type1_session(V3); + break; + case 'download_session_v4': + $this->type1_session(V4); + break; + case 'refreshtitle': + $this->refreshTitle(); + break; + case 'gwas_status': + $this->status_gwas(); + break; + case 'pred_status': + $this->status_pred(); + break; + case 'filter_lines': + $this->filterLines(); + break; + case 'web': + $this->type1Select(); + break; + } + } + + /** + * Load header and footer then check session to use existing data selection + */ + private function type1Select() + { + global $config; + global $mysqli; + require_once $config['root_dir'].'theme/normal_header.php'; + $phenotype = ""; + $lines = ""; + $markers = ""; + $saved_session = ""; + $this->type1Checksession(); + require_once 'downloads/select-map.php'; + require_once $config['root_dir'].'theme/footer.php'; + } + + /** + * Checks the session variable, if there is lines data saved then go directly to the lines menu + */ + private function type1Checksession() + { + global $mysqli; + ?> + + + + +

+ refreshTitle(); + if (empty($_SESSION['phenotype'])) { + echo "Select a set of traits and phenotype trials

"; + } elseif (empty($_SESSION['selected_lines'])) { + echo "
Select validation set containing trait measurements to plot prediction vs observed. "; + echo "Wizard
"; + echo "Select prediction set without trait measurements to predict the traits. "; + echo "Lines by Properties, "; + echo "Lines by Genotype Experiment
"; + } elseif (empty($_SESSION['phenotype']) && empty($_SESSION['training_traits'])) { + echo "Please select traits before using this feature.

"; + echo "Select Traits

"; + echo "Wizard (Lines, Traits, Trials)"; + } elseif (empty($_SESSION['selected_map'])) { + if (isset($_SESSION['geno_exps'])) { + $geno_exp = $_SESSION['geno_exps']; + $geno_str = $geno_exp[0]; + $sql = "select marker_uid from allele_bymarker_exp_101 where experiment_uid = $geno_str and pos is not null limit 10"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli) . $sql); + if ($row = mysqli_fetch_array($res)) { + } else { + echo "Select a genetic map."; + echo "
"; + } + } else { + echo "Select a genetic map."; + echo "
"; + } + } + if (!empty($_SESSION['training_lines']) && !empty($_SESSION['selected_lines'])) { + if (empty($_SESSION['selected_trials'])) { + echo "Prediction"; + } else { + echo "Validation"; + $tmp = $_SESSION['selected_trials']; + $e_uid = implode(",", $tmp); + $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli) . $sql); + while ($row = mysqli_fetch_array($res)) { + echo "$row[0]
"; + } + } + + $count = count($_SESSION['selected_lines']); + $markers = $_SESSION['filtered_markers']; + $estimate = count($markers) + count($lines); + echo "$count"; + ?> + +
+ + + +
+ (Error - $count unique lines in prediction set)"; + } + } + echo ""; + if ($count_dup > 0) { + if (empty($_SESSION['selected_trials'])) { + echo " Warning - $count_dup lines removed from prediction set because they are in training set"; + } else { + echo " Warning - $count_dup lines removed from validation set because they are in training set"; + } + } + $min_maf = 5; + $max_missing = 10; + $max_miss_line = 10; + $unique_str = chr(rand(65, 80)).chr(rand(65, 80)).chr(rand(65, 80)).chr(rand(65, 80)); + ?> +
+ +

Minimum MAF ≥ % +      + Remove markers missing > % of data +      + Remove lines missing > % of data +

+
+
+
+ + +
+ +

+
+
+
+
+ + "; + } + echo "
"; + } + + /** + * filters markers and lines based on settings + */ + private function filterLines() + { + if (isset($_GET['maf'])) { + $min_maf = $_GET['maf']; + } else { + $min_maf = 5; + } + if (isset($_GET['mmm'])) { + $max_missing = $_GET['mmm']; + } else { + $max_missing = 10; + } + if (isset($_GET['mml'])) { + $max_miss_line = $_GET['mml']; + } else { + $max_miss_line = 10; + } + $lines = $_SESSION['selected_lines']; + if (isset($_SESSION['training_lines'])) { + $training_lines = $_SESSION['training_lines']; + } else { + $training_lines = ""; + } + if (isset($_SESSION['geno_exps'])) { + $experiment_uid = $_SESSION['geno_exps'][0]; + calculate_afe($experiment_uid, $min_maf, $max_missing, $max_miss_line); + findCommonLines($lines); + } elseif ($training_lines == "") { + calculate_af($lines, $min_maf, $max_missing, $max_miss_line); + } else { + calculate_af($training_lines, $min_maf, $max_missing, $max_miss_line); + } + ?> + + Genomic Association and Prediction"; + if (!empty($_SESSION['training_traits'])) { + $tmp = $_SESSION['training_traits']; + $tmp = $tmp[0]; + $sql = "select phenotypes_name from phenotypes where phenotype_uid = '$tmp'"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_array($res); + echo "

Trait: $row[0]

"; + } + if ($command == "save_t") { + if (!empty($_SESSION['selected_traits'])) { + $_SESSION['training_traits'] = $_SESSION['selected_traits']; + $_SESSION['training_trials'] = $_SESSION['selected_trials']; + $_SESSION['training_lines'] = $_SESSION['selected_lines']; + unset($_SESSION['selected_trials']); + unset($_SESSION['selected_lines']); + unset($_SESSION['filtered_lines']); + unset($_SESSION['filtered_markers']); + unset($_SESSION['clicked_buttons']); + } else { + echo "error - no selection found"; + } + } elseif ($command == "save_p") { + $_SESSION['predict_traits'] = $_SESSION['selected_traits']; + $_SESSION['predict_trials'] = $_SESSION['selected_trials']; + $_SESSION['predict_lines'] = $_SESSION['selected_lines']; + } elseif ($command == "clear") { + unset($_SESSION['selected_traits']); + unset($_SESSION['selected_trials']); + unset($_SESSION['selected_lines']); + unset($_SESSION['training_traits']); + unset($_SESSION['training_trials']); + unset($_SESSION['training_lines']); + unset($_SESSION['filtered_lines']); + unset($_SESSION['phenotype']); + } elseif ($command== "clear_p") { + unset($_SESSION['selected_traits']); + unset($_SESSION['selected_trials']); + unset($_SESSION['selected_lines']); + } + if (empty($_SESSION['selected_lines']) || empty($_SESSION['training_lines'])) { + ?> + +
Genome Wide Association (consensus genotype)
+ 1. Select a set of lines, trait, and trials (one trait).
+ 2. Select the genetic map which has the best coverage for this set.
+ 3. Return to this page and select model options then GWAS Analysis
+ +
Genome Wide Association (single genotype experiment)
+ 1. Select a set of lines by genotype experiment.
+ 2. Select a trait and phenotype trial.
+ 3. Select the genetic map which has the best coverage for this set.
+ 4. Return to this page and select model options then GWAS Analysis
+ +
Genomic Prediction
+ 1. Select a set of lines, trait, and trials (one trait).
+ 2. Return to this page and select G-BLUP Analysis for cross-validation of the training set. Then save Training Set.
+ 3. To select a validation set, select a new set of lines using a different trial, then return to this page for analysis.
+ 4. To select a prediction set, select a new set of lines without phenotype measurements, then return to this page for analysis.
+
+ +

Additional notes on GWAS and G-BLUP methods
+ "; + echo "SetTrialsLines"; + $p_uid = $_SESSION['training_traits']; + $p_uid = $p_uid[0]; + $sql = "select phenotypes_name from phenotypes where phenotype_uid = $p_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_array($res); + echo "Training"; + if (!empty($_SESSION['training_trials'])) { + $tmp = $_SESSION['training_trials']; + $e_uid = implode(",",$tmp); + $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + echo "$row[0]
"; + } + } + echo ""; + if (count($_SESSION['training_lines']) > 0) { + $selectedlines = implode(",", $_SESSION['training_lines']); + $sql_option = " AND lr.line_record_uid IN ($selectedlines)"; + } else { + $sql_option = ""; + } + $sql = "SELECT count(DISTINCT lr.line_record_uid) + FROM tht_base as tb, phenotype_data as pd, phenotypes as p, line_records as lr + WHERE pd.tht_base_uid = tb.tht_base_uid + $sql_option + AND p.phenotype_uid = pd.phenotype_uid + AND lr.line_record_uid = tb.line_record_uid + AND pd.phenotype_uid = $p_uid + AND tb.experiment_uid IN ($e_uid)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_array($res); + echo "$row[0]"; + ?> + +

+ + + +
+ "; + } + } elseif (!empty($_SESSION['phenotype']) && !empty($_SESSION['selected_trials']) ) { + ?> + +
TraitsTrialsLinesGenetic Map +
+ "; + } + echo ""; + $tmp = $_SESSION['selected_trials']; + $e_uid = implode(",",$tmp); + $sql = "select trial_code from experiments where experiment_uid IN ($e_uid)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + echo "$row[0]
"; + } + echo "
"; + $count = count($_SESSION['selected_lines']); + echo "$count"; + if (isset($_SESSION['geno_exps'])) { + $geno_exp = $_SESSION['geno_exps']; + $geno_str = $geno_exp[0]; + $sql = "select marker_uid from allele_bymarker_exp_101 where experiment_uid = $geno_str and pos is not null limit 10"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row = mysqli_fetch_array($res)) { + $sql = "select trial_code from experiments where experiment_uid = $geno_str"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_array($res); + $name = $row[0]; + echo "using map from genotype experiment
$name"; + } elseif (isset($_SESSION['selected_map'])) { + $sql = "select mapset_name from mapset where mapset_uid = $map"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_assoc($res); + $map_name = $row['mapset_name']; + echo "$map_name"; + } + } elseif (isset($_SESSION['selected_map'])) { + $sql = "select mapset_name from mapset where mapset_uid = $map"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_assoc($res); + $map_name = $row['mapset_name']; + echo "$map_name"; + } + echo "
"; + if ($count < 10) { + echo "Warning: analysis may fail with only $count lines selected"; + } + $min_maf = 5; + $max_missing = 10; + $max_miss_line = 10; + $lines = $_SESSION['selected_lines']; + $count_markers = calculate_db($lines, $min_maf, $max_missing, $max_miss_line); + $count_lines = count($lines); + $estimate = ($count_markers * $count_lines) / 10000; + if ($count > 0) { + ?> +
Minimum MAF ≥
+ Remove markers missing > % of data
+ + Remove lines missing > % of data
+ + + + Remove trait outliers + +
+
+
+
+
+
+
+ + +
+ GWAS + principal components + +
method + EMMAX (faster but can underestimate significance)
+ EMMA with REML +
G-BLUP + +

+
+ + + + then continue to select prediction set +
+ +
+
+
+
+ + Warning, not a valid combination of traits, trials, and lines"; + } + } + ?> +

+ +

Genomic Selection

+ + 3) { + $histo_width = 800 + ($ntrials - 3) * 200; + } + + if(!file_exists($dir.$filename3)){ + $h = fopen($dir.$filename3, "w+"); + $png = "png(\"$dir$filename4\", width=$histo_width, height=300)\n"; + $cmd1 = "phenoData <- as.matrix(read.delim(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=1))\n"; + $cmd1 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; + $cmd2 = "phenolabel <- \"$phenolabel\"\n"; + $cmd3 = "phenounit <- \"$phenounit\"\n"; + $cmd4 = $triallabel; + fwrite($h, $png); + fwrite($h, $cmd1); + fwrite($h, $cmd2); + fwrite($h, $cmd3); + fwrite($h, $cmd4); + fclose($h); + } + exec("cat /tmp/tht/$filename3 R/GShisto.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5"); + if (file_exists("/tmp/tht/$filename5")) { + $h = fopen("/tmp/tht/$filename5", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + if (file_exists("/tmp/tht/$filename4")) { + print "
"; + } else { + echo "Error in R script R/GShisto.R
\n"; + } + } + + private function display_gwas_hits($h) { + global $mysqli; + echo "Top five marker scores from GWAS analysis
"; + echo "
markerchromposvalueexternal link (resource name)"; + $line= fgetcsv($h); + while ($line= fgetcsv($h)) { + $link = ""; + $sql = "select value, name_annotation, linkout_string_for_annotation + from markers, marker_annotations, marker_annotation_types + where markers.marker_uid = marker_annotations.marker_uid + and marker_annotations.marker_annotation_type_uid = marker_annotation_types.marker_annotation_type_uid + and marker_name = \"$line[1]\""; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_assoc($res)) { + $reg_pattern = '/XXXX/'; + $replace_string = $row['value']; + $name = $row['name_annotation']; + $source_string = $row['linkout_string_for_annotation']; + $linkString = preg_replace($reg_pattern, $replace_string, $source_string); + if ($link == "") { + if ($linkString != "") { + $link = "$replace_string ($name)"; + } + } else { + if ($linkString != "") { + $link .= "
$replace_string ($name)"; + } + } + } + if ($count < 5) { + $markerlink = "$line[1]"; + echo "
$markerlink$line[2]$line[3]$line[4]$link\n"; + } + $count++; + } + fclose($h); + echo "
"; + } + + /** + * display gwas results + */ + private function status_gwas() { + $unique_str = $_GET['unq']; + $dir = '/tmp/tht/'; + $found = 1; + $filename9 = 'THTdownload_hmp_' . $unique_str. '.txt'; + $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; + $filename3 = 'THTdownload_gwa_' . $unique_str . '.R'; + $filename4 = 'THTdownload_gwa1_' . $unique_str . '.png'; + $filename7 = 'THTdownload_gwa2_' . $unique_str . '.png'; + $filename10 = 'THTdownload_gwa3_' . $unique_str . '.png'; + $filename5 = 'process_error_gwa_' . $unique_str . '.txt'; + $filename6 = 'R_error_gwa_' . $unique_str . '.txt'; + $filename1 = 'THT_result_' . $unique_str . '.csv'; + $filenameK = 'Kinship_matrix_' . $unique_str . '.csv'; + if (file_exists("/tmp/tht/$filename7")) { + } else { + //echo "$filename7 not ready
\n"; + $found = 0; + } + if (file_exists("/tmp/tht/$filename10")) { + } else { + //echo "$filename10 not ready
\n"; + $found = 0; + } + if (file_exists("/tmp/tht/$filename4")) { + } else { + //echo "$filename4 not ready
\n"; + $found = 0; + } + if (file_exists("/tmp/tht/$filename5")) { + $h = fopen("/tmp/tht/$filename5", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + if (file_exists("/tmp/tht/$filename3")) { + // Extract the Trait name from the .R file. + $h = fopen("/tmp/tht/$filename3", "r"); + while ($line=fgets($h)) { + if (strpos($line, 'phenolabel') !== FALSE) { + $traitname = preg_replace('/phenolabel <- "(.*)"/', '$1', $line); + } + } + fclose($h); + } + if ($found) { + print "
"; + print "
"; + print "
"; + print "Trait: $traitname

"; + print "Export GWAS results to CSV file "; + print "with columns for marker name, chromosome, position, marker score

"; + print "Export Kinship matrix

"; + $count = 0; + $h = fopen("/tmp/tht/$filename1", "r"); + if ($h) { + $this->display_gwas_hits($h); + } + } else { + if (isset($_SESSION['filtered_ines'])) { + $lines = $_SESSION['filtered_lines']; + } else { + $lines = $_SESSION['selected_lines']; + } + if (isset($_SESSION['filtered_markers'])) { + $markers = $_SESSION['filtered_markers']; + } else { + $markers = $_SESSION['geno_exps_cnt']; + } + $estimate = count($lines) * count($markers); + $estimate = round($estimate/6000000,1); + echo "Results not ready yet. Estimated analysis time is $estimate minutes using default options.
"; + ?> + Select the "Check Results" button to retrieve results.
+ +
+
"; + } else { + $found = 0; + } + if (file_exists("/tmp/tht/$filename4")) { + print "
"; + if (isset($_SESSION['selected_trials'])) { + print "Export prediction to CSV file

"; + } else { + print "Cross-validation of training set using 5 folds and 2 repeats.
\n"; + print "Export Cross-validated prediction to CSV file

"; + } + } else { + $found = 0; + } + + if (file_exists("/tmp/tht/$filename5")) { + $h = fopen("/tmp/tht/$filename5", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + if (file_exists("/tmp/tht/$filename6")) { + $h = fopen("/tmp/tht/$filename6", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + if ($found == 0) { + $lines = $_SESSION['filtered_lines']; + $markers = $_SESSION['filtered_markers']; + $estimate = count($lines) + count($markers); + $estimate = round($estimate/700,1); + echo "Results not ready yet. Estimated analysis time is $estimate minutes.
"; + ?> + Select the "Check Results" button to retrieve results.
+ +
+ /dev/null 2> /tmp/tht/$filename5"); + if (file_exists("/tmp/tht/$filename7")) { + print "
"; + } else { + echo "Error in R script
\n"; + echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; + } + if (file_exists("/tmp/tht/$filename10")) { + print "
"; + } + if (file_exists("/tmp/tht/$filename4")) { + print "
"; + print "Trait: $phenolabel

"; + print "Export GWAS results to CSV file "; + print "with columns for marker name, chromosome, position, marker score

"; + print "Export Kinship matrix

"; + $count = 0; + $h = fopen("/tmp/tht/$filename1", "r"); + if($h) { + $this->display_gwas_hits($h); + } else { + echo "error - could not open $filename1\n"; + } + } + if (file_exists("/tmp/tht/$filename5")) { + $h = fopen("/tmp/tht/$filename5", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + } + + /** + * run GWAS results in background and notify when complete + */ + private function run_gwa2() { + global $config; + global $mysqli; + $unique_str = $_GET['unq']; + $model_opt = $_GET['fixed2']; + $p3d = $_GET['p3d']; + if (isset($_SESSION['training_traits'])) { + $phenotype = $_SESSION['training_traits']; + $phenotype = $phenotype[0]; + //} elseif (isset($_SESSION['selected_traits'])) { use when multiple traits is working + } elseif (isset($_SESSION['phenotype'])) { + $phenotype = $_SESSION['phenotype']; + } + $sql = "select phenotypes_name, unit_name from phenotypes, units + where phenotypes.unit_uid = units.unit_uid + and phenotype_uid = $phenotype"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $row = mysqli_fetch_array($res); + $phenolabel = $row[0]; + //$unique_fld = chr(rand(65,80)).chr(rand(65,80)).chr(rand(65,80)).chr(rand(65,80)); + //mkdir("/tmp/tht/$unique_fld"); it would be better to put all files in directory + $dir = '/tmp/tht/'; + $filename9 = 'THTdownload_hmp_' . $unique_str. '.txt'; + $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; + $filename3 = 'THTdownload_gwa_' . $unique_str . '.R'; + $filename4 = 'THTdownload_gwa1_' . $unique_str . '.png'; + $filename7 = 'THTdownload_gwa2_' . $unique_str . '.png'; + $filename10 = 'THTdownload_gwa3_' . $unique_str . '.png'; + $filename5 = 'process_error_gwa_' . $unique_str . '.txt'; + $filename6 = 'R_error_gwa_' . $unique_str . '.txt'; + $filename1 = 'THT_result_' . $unique_str . '.csv'; + $filenameK = 'Kinship_matrix_' . $unique_str . '.csv'; + if(!file_exists($dir.$filename3)){ + $h = fopen($dir.$filename3, "w+"); + $png1 = "png(\"$dir$filename4\", width=1200, height=400)\n"; + $png2 = "png(\"$dir$filename7\", width=1200, height=400)\n"; + $png3 = "png(\"$dir$filename10\", width=1200, height=400)\n"; + $png4 = "dev.set(3)\n"; + $cmd3 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; + $cmd4 = "hmpData <- read.table(\"$dir$filename9\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", check.names = FALSE)\n"; + $cmd5 = "phenolabel <- \"$phenolabel\"\n"; + $cmd6 = "fileerr <- \"$dir$filename6\"\n"; + $cmd7 = "fileout <- \"$filename1\"\n"; + $cmd8 = "model_opt <- \"$model_opt\"\n"; + $cmd9 = "fileK <- \"$filenameK\"\n"; + if (isset($_SESSION['username'])) { + $emailAddr = $_SESSION['username']; + $emailAddr = "email <- \"$emailAddr\"\n"; + fwrite($h, $emailAddr); + $result_url = $config['base_url'] . "gensel.php?function=gwas_status&unq=$unique_str"; + $result_url = "result_url <- \"$result_url\"\n"; + fwrite($h, $result_url); + } + fwrite($h, $png1); + fwrite($h, $png2); + fwrite($h, $png3); + fwrite($h, $png4); + fwrite($h, $cmd3); + fwrite($h, $cmd4); + fwrite($h, $cmd5); + fwrite($h, $cmd6); + fwrite($h, $cmd7); + fwrite($h, $cmd8); + fwrite($h, $cmd9); + fwrite($h, "p3d <- $p3d\n"); + fwrite($h, "setwd(\"/tmp/tht/\")\n"); + fclose($h); + } + exec("cat /tmp/tht/$filename3 R/GSforGWA.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5 &"); + + if (isset($_SESSION['filtered_lines'])) { + $lines = $_SESSION['filtered_lines']; + } else { + $lines = $_SESSION['selected_lines']; + } + if (isset($_SESSION['filtered_markers'])) { + $markers = $_SESSION['filtered_markers']; + } else { + $markers = $_SESSION['geno_exps_cnt']; + } + $estimate = count($lines) * count($markers); + $estimate = round($estimate/600000,1); + echo "Estimated analysis time is $estimate minutes using default options.
"; + $emailAddr = $_SESSION['username']; + if (isset($_SESSION['username'])) { + echo "An email will be sent to $emailAddr when the job is complete
\n"; + } else { + echo "If you Login a notification will be sent upon completion
\n"; + } + ?> + Select the "Check Results" button to retrieve results.
+ +
+ /dev/null 2> /tmp/tht/$filename5"); + if (file_exists("/tmp/tht/$filename10")) { + print "
"; + } else { + echo "Error in R script
\n"; + echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; + } + if (file_exists("/tmp/tht/$filename4")) { + print "
"; + //if (isset($_SESSION['selected_traits'])) { use when multiple traits is supported + if (isset($_SESSION['selected_trials'])) { + print "Export prediction to CSV file

"; + } else { + print "Cross-validation of training set using 5 folds and 2 repeats.
\n"; + print "Export Cross-validated prediction to CSV file

"; + } + } else { + echo "Error in R script
\n"; + echo "cat /tmp/tht/$filename3 R/GSforT3.R | R --vanilla
"; + } + + if (file_exists("/tmp/tht/$filename5")) { + $h = fopen("/tmp/tht/$filename5", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + if (file_exists("/tmp/tht/$filename6")) { + $h = fopen("/tmp/tht/$filename6", "r"); + while ($line=fgets($h)) { + echo "$line
\n"; + } + fclose($h); + } + + } + + /** + * run rrBLUP R script in background and notify when complete + */ + private function run_rscript2() { + $unique_str = $_GET['unq']; + $filename1 = 'THTdownload_hapmap_' . $unique_str . '.txt'; + $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; + $filename3 = 'THTdownload_gensel_' . $unique_str . '.R'; + $filename10 = 'THTdownload_gensel2_' . $unique_str . '.png'; + $filename4 = 'THTdownload_gensel_' . $unique_str . '.png'; + $filename5 = 'THT_process_error_' . $unique_str . '.txt'; + $filename6 = 'THT_R_error_' . $unique_str . '.txt'; + $filename7 = 'THT_result_' . $unique_str . '.csv'; + exec("cat /tmp/tht/$filename3 R/GSforT34.R | R --vanilla > /dev/null 2> /tmp/tht/$filename5 &"); + $lines = $_SESSION['filtered_lines']; + $markers = $_SESSION['filtered_markers']; + $estimate = count($lines) + count($markers); + $estimate = round($estimate/700,1); + echo "Estimated analysis time is $estimate minutes.
"; + $emailAddr = $_SESSION['username']; + if (isset($_SESSION['username'])) { + echo "An email will be sent to $emailAddr when the job is complete
\n"; + } else { + echo "If you Login a notification will be sent upon completion
\n"; + } + ?> + Select the "Check Results" button to retrieve results.
+ +
+ + + 0) { + $selectedlines = $_SESSION['training_lines']; + foreach ($selectedlines as $uid) { + if (!in_array($uid,$all_lines)) { + $all_lines[] = $uid; + } + } + } + + $dir = '/tmp/tht/'; + $filename1 = 'THTdownload_snp_p_' . $unique_str . '.txt'; + $filename8 = 'THTdownload_snp_t_' . $unique_str . '.txt'; + $filename9 = 'THTdownload_hmp_' . $unique_str . '.txt'; + $filename2 = 'THTdownload_traits_' . $unique_str . '.txt'; + $filename3 = 'THTdownload_gensel_' . $unique_str . '.R'; + $filename4 = 'THTdownload_gensel_' . $unique_str . '.png'; + $filename10 = 'THTdownload_gensel2_' . $unique_str . '.png'; + $filename5 = 'THT_process_error_' . $unique_str . '.txt'; + $filename6 = 'THT_R_error_' . $unique_str . '.txt'; + $filename7 = 'THT_result_' . $unique_str . '.csv'; + + //create genotype file for prediction set + if ($version == "V4") { + if ($training_lines == "") { + if(!file_exists($dir.$filename8)){ + $dtype = "qtlminer"; + $h = fopen($dir.$filename8, "w+"); + fwrite($h,$this->type2_build_markers_download($lines,$markers,$dtype)); + fclose($h); + } + } else { + //remove duplicate lines from prediction + foreach ($lines as $key => $value) { + if(in_array($value, $training_lines)){ + unset($lines[$key]); + } + } + if(!file_exists($dir.$filename8)) { + $dtype = "qtlminer"; + $h = fopen($dir.$filename8, "w+"); + fwrite($h,$this->type2_build_markers_download($training_lines,$markers,$dtype)); + fclose($h); + } + if(!file_exists($dir.$filename1)) { + $dtype = "qtlminer"; + $h = fopen($dir.$filename1, "w+"); + fwrite($h,$this->type2_build_markers_download($lines,$markers,$dtype)); + fclose($h); + } + } + } elseif ($version == "V3") { + if (isset($_SESSION['geno_exps'])) { + $experiment = $_SESSION['geno_exps']; + $geno_str = $experiment[0]; + $tmp = count($markers); + if(!file_exists($dir.$filename9)){ + $dtype = "qtlminer"; + $h = fopen($dir.$filename9, "w+"); + $output = type4BuildMarkersDownload($geno_str, $min_maf, $max_missing, $dtype, $h); + fclose($h); + } + } elseif ($training_lines == "") { + if(!file_exists($dir.$filename9)){ + $dtype = "qtlminer"; + $h = fopen($dir.$filename9, "w+"); + fwrite($h,$this->type3_build_markers_download($lines,$markers,$dtype)); + fclose($h); + } + } else { + if(!file_exists($dir.$filename9)){ + $dtype = "qtlminer"; + $h = fopen($dir.$filename9, "w+"); + fwrite($h,$this->type3_build_markers_download($training_lines,$markers,$dtype)); + fclose($h); + } + } + } + + if(!file_exists($dir.$filename2)){ + $h = fopen($dir.$filename2, "w+"); + $datasets_exp = ""; + $subset = "yes"; + fwrite($h,$this->type1_build_tassel_traits_download($experiments_t,$phenotype,$datasets_exp,$subset)); + fclose($h); + } + if(!file_exists($dir.$filename3)){ + $h = fopen($dir.$filename3, "w+"); + $png = "png(\"$dir$filename4\", width=900, height=500)\n"; + $png2 = "png(\"$dir$filename10\", width=600, height=500)\n"; + $cmd1 = "snpData_p <- read.table(\"$dir$filename1\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", row.names=1)\n"; + $cmd2 = "snpData_t <- read.table(\"$dir$filename8\", header=TRUE, stringsAsFactors=FALSE, sep=\"\\t\", row.names=1)\n"; + $cmd3 = "phenoData <- read.table(\"$dir$filename2\", header=TRUE, na.strings=\"-999\", stringsAsFactors=FALSE, sep=\"\\t\", row.names=NULL)\n"; + $cmd5 = "fileerr <- \"$filename6\"\n"; + $cmd6 = "fileout <- \"$filename7\"\n"; + $cmd7 = "phenolabel <- \"$phenolabel\"\n"; + $cmd8 = "common_code <- \"" . $config['root_dir'] . "R/AmatrixStructure.R\"\n"; + $cmd9 = $triallabel; + if (isset($_SESSION['username'])) { + $emailAddr = $_SESSION['username']; + $emailAddr = "email <- \"$emailAddr\"\n"; + fwrite($h, $emailAddr); + $result_url = $config['base_url'] . "gensel.php?function=pred_status&unq=$unique_str"; + $result_url = "result_url <- \"$result_url\"\n"; + fwrite($h, $result_url); + } + + fwrite($h, $png); + fwrite($h, $png2); + if ($training_lines != "") { + fwrite($h, $cmd1); + } + fwrite($h, $cmd2); + fwrite($h, $cmd3); + fwrite($h, $cmd5); + fwrite($h, $cmd6); + fwrite($h, $cmd7); + fwrite($h, $cmd8); + fwrite($h, $cmd9); + fwrite($h, "model <- \"$model_opt\"\n"); + fwrite($h, "setwd(\"/tmp/tht/\")\n"); + fclose($h); + } + + if (($version == "V4") && (isset($_SESSION['training_lines']))) { + if (count($_SESSION['training_lines']) < 50) { + echo "skip CrossValidation because traing set has less than 50 lines
\n"; + } + } + } + + /** + * generate download files in qltminer format + * @param unknown_type $experiments + * @param unknown_type $traits + * @param unknown_type $datasets + */ + function type1_build_traits_download($experiments, $traits, $datasets) + { + global $mysqli; + + $output = 'Experiment' . $this->delimiter . 'Inbred'; + $traits = explode(',', $traits); + + + $select = "SELECT experiments.trial_code, line_records.line_record_name"; + $from = " FROM tht_base + JOIN experiments ON experiments.experiment_uid = tht_base.experiment_uid + JOIN line_records ON line_records.line_record_uid = tht_base.line_record_uid "; + foreach ($traits as $trait) { + $from .= " JOIN ( + SELECT p.phenotypes_name, pd.value, pd.tht_base_uid, pmd.number_replicates, pmd.experiment_uid + FROM phenotypes AS p, phenotype_data AS pd, phenotype_mean_data AS pmd + WHERE pd.phenotype_uid = p.phenotype_uid + AND pmd.phenotype_uid = p.phenotype_uid + AND p.phenotype_uid = ($trait)) AS t$trait + + ON t$trait.tht_base_uid = tht_base.tht_base_uid AND t$trait.experiment_uid = tht_base.experiment_uid"; + $select .= ", t$trait.phenotypes_name as name$trait, t$trait.value as value$trait, t$trait.number_replicates as nreps$trait"; + } + $where = " WHERE tht_base.experiment_uid IN ($experiments) + AND tht_base.check_line = 'no' + AND tht_base.datasets_experiments_uid in ($datasets)"; + + $res = mysqli_query($mysqli, $select.$from.$where) or die(mysqli_error($mysqli)); + + $namevaluekeys = null; + $valuekeys = array(); + while($row = mysqli_fetch_assoc($res)) { + if ($namevaluekeys == null) + { + $namevaluekeys = array_keys($row); + unset($namevaluekeys[array_search('trial_code', $namevaluekeys)]); + //unset($namevaluekeys[array_search('number_replications', $namevaluekeys)]); + unset($namevaluekeys[array_search('line_record_name', $namevaluekeys)]); + + foreach($namevaluekeys as $namevaluekey) { + if (stripos($namevaluekey, 'name') !== FALSE) { + $output .= $this->delimiter . "{$row[$namevaluekey]}" . $this->delimiter . "N"; + } else { + array_push($valuekeys, $namevaluekey); + } + } + $output .= "\n"; + } + $output .= "{$row['trial_code']}" . $this->delimiter . "{$row['line_record_name']}"; + foreach($valuekeys as $valuekey) { + if (is_null($row[$valuekey])) + $row[$valuekey] = 'N/A'; + $output .= $this->delimiter . "{$row[$valuekey]}" ; + } + $output .= "\n"; + } + + return $output; + } + + /** + * Build trait download file for Tassel program interface + * @param unknown_type $experiments + * @param unknown_type $traits + * @param unknown_type $datasets + * @param unknown_type $subset + * @return string + * + * modified to work with only one trait + * if trait measured more than once then add AVG() and GROUP by + * for R script the line names have to be quoted or special characters will cause problems + */ + function type1_build_tassel_traits_download($experiments, $traits, $datasets, $subset) { + global $mysqli; + $delimiter = "\t"; + $output = ''; + $outputheader1 = ''; + $outputheader3 = ""; + + //only use first trait + $pattern = "/([0-9]+)/"; + if (preg_match($pattern,$traits,$match)) { + $traits = $match[1]; + } else { + echo "error - can not identify trait $traits\n"; + die(); + } + + if (isset($_SESSION['filtered_lines'])) { + $lines = $_SESSION['filtered_lines']; + } else { + die("Error: should have lines selected
\n"); + } + $selectedlines = implode(",", $lines); + $outputheader2 = "gid" . $delimiter . "pheno" . $delimiter . "trial" . $delimiter . "year"; + + $sql_option = ""; + if ($subset == "yes" && count($_SESSION['filtered_lines']) > 0) { + $selectedlines = implode(",", $_SESSION['filtered_lines']); + $sql_option = " AND lr.line_record_uid IN ($selectedlines)"; + } else { + die("Error: should have lines selected
\n"); + } + if (preg_match("/\d/",$experiments)) { + $sql_option .= "AND tb.experiment_uid IN ($experiments)"; + } + if (preg_match("/\d/",$datasets)) { + $sql_option .= "AND ((tht_base.datasets_experiments_uid in ($datasets) AND tht_base.check_line='no') OR (tht_base.check_line='yes'))"; + } + + // get a list of all line names in the selected datasets and experiments, + // INCLUDING the check lines // AND tht_base.check_line IN ('no') + $sql = "SELECT DISTINCT lr.line_record_name, lr.line_record_uid + FROM line_records as lr, tht_base as tb, phenotype_data as pd + WHERE lr.line_record_uid=tb.line_record_uid + AND pd.tht_base_uid = tb.tht_base_uid + AND pd.phenotype_uid = $traits + $sql_option"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while($row = mysqli_fetch_array($res)) { + $lines_names[] = $row['line_record_name']; + $line_uid[] = $row['line_record_uid']; + } + $nlines = count($lines_names); + //die($sql . "
" . $nlines); + + $outputheader1 = "$nlines".$delimiter."$ncols".$delimiter.$nheaderlines; + $output = $outputheader2."\n"; + + //add lines from pred set + if(isset($_SESSION['training_lines'])) { + if (isset($_SESSION['selected_lines'])){ + $selectedlines = $_SESSION['selected_lines']; + } + if (isset($_SESSION['selected_trials'])) { + $selectedtrials = $_SESSION['selected_trials']; + $selectedtrials = implode(",",$selectedtrials); + } + } else { + $selectedlines = array(); + $selectedtrials = ""; + } + if (preg_match("/\d/",$selectedtrials)) { + $sql_option = " WHERE tb.experiment_uid IN ($selectedtrials) AND "; + } else{ + $sql_option = " WHERE "; + } + foreach ($selectedlines as $uid) { + if (!in_array($uid,$line_uid)) { + $sql = "SELECT line_record_name, tb.experiment_uid, experiment_year as exper + from line_records as lr, tht_base as tb, experiments as exp + $sql_option + lr.line_record_uid=tb.line_record_uid + and tb.experiment_uid = exp.experiment_uid + and lr.line_record_uid = $uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if (preg_match("/\d/",$selectedtrials)) { //for case where there are phenotype measurements + while ($row = mysqli_fetch_array($res)) { + $line_name = $row[0]; + $exper = $row[1]; + $year = $row[2]; + $sql = "select pd.value as value + from tht_base as tb, phenotype_data as pd + WHERE tb.experiment_uid = $exper AND + tb.line_record_uid = $uid + AND pd.tht_base_uid = tb.tht_base_uid + AND pd.phenotype_uid = $traits"; + $res2 = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row2 = mysqli_fetch_array($res2)) { + $value = $row2['value']; + } else { + $value = "-999"; + } + + $outline = $line_name.$delimiter.$value.$delimiter.$exper.$delimiter.$year."\n"; + $output .= $outline; + } + } else { //for case where there are no phenotype measurements + if ($row = mysqli_fetch_array($res)) { + $line_name = $row[0]; + $year = $row[2]; + $exper = 0; //use 0 to indicate the prediction set + $value = "-999"; + $outline = $line_name.$delimiter.$value.$delimiter.$exper.$delimiter.$year."\n"; + $output .= $outline; + } + } + } else { + //echo "dropped from prediction $uid
\n"; + } + } + + // loop through all the lines in the file + for ($i=0;$i<$nlines;$i++) { + if (preg_match("/\d/",$experiments)) { + $sql_option = " WHERE tb.experiment_uid IN ($experiments) AND "; + } else { + $sql_option = " WHERE "; + } + $sql = "SELECT pd.value as value, pd.phenotype_uid, tb.experiment_uid as exper, experiment_year, tb.tht_base_uid + FROM tht_base as tb, phenotype_data as pd, experiments as exp + $sql_option + tb.line_record_uid = $line_uid[$i] + AND pd.tht_base_uid = tb.tht_base_uid + AND tb.experiment_uid = exp.experiment_uid + AND pd.phenotype_uid = $traits"; + // GROUP BY tb.tht_base_uid, pd.phenotype_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $found = 0; + while ($row = mysqli_fetch_array($res)) { + $found = 1; + $outline = "'$lines_names[$i]'".$delimiter.$row['value'].$delimiter.$row['exper'].$delimiter.$row['experiment_year']."\n"; + if ($removeOutlier == "Y") { + $line = $lines_names[$i]; + $exp = $row['exper']; + if (isset($_SESSION['outliers'][$line][$traits][$exp])) { + echo "skip\n"; + continue; + } + } + $output .= $outline; + } + if ($found == 0) { + $outline = "'$lines_names[$i]'".$delimiter."999".$delimiter."999".$delimiter."999\n"; + $output .= $outline; + } + + } + + return $output; + } + + /** + * Build trait download file for Tassel program interface + * @param string $experiments + * @param unknown_type $traits + * @param unknown_type $lines + * @param unknown_type $subset + * @return string + */ + function type2_build_tassel_traits_download($experiments, $traits, $lines, $subset) + { + global $mysqli; + $delimiter = "\t"; + $output = ''; + $outputheader1 = ''; + $outputheader2 = ''; + $outputheader3 = ''; + + //count number of traits and number of experiments + $ntraits=substr_count($traits, ',')+1; + $nexp=substr_count($experiments, ',')+1; + + // figure out which traits are at which location + if ($experiments=="") { + $sql_option = ""; + } else { + $sql_option = "AND tb.experiment_uid IN ($experiments)"; + } + + $selectedlines = implode(",", $lines); + $sql_option = $sql_option . " AND tb.line_record_uid IN ($selectedlines)"; + $sql = "SELECT DISTINCT e.trial_code, e.experiment_uid, p.phenotypes_name,p.phenotype_uid + FROM experiments as e, tht_base as tb, phenotype_data as pd, phenotypes as p + WHERE + e.experiment_uid = tb.experiment_uid + $sql_option + AND pd.tht_base_uid = tb.tht_base_uid + AND p.phenotype_uid = pd.phenotype_uid + AND pd.phenotype_uid IN ($traits) + ORDER BY p.phenotype_uid,e.experiment_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $ncols = mysqli_num_rows($res); + while($row = mysqli_fetch_array($res)) { + $outputheader2 .= str_replace(" ","_",$row['phenotypes_name']).$delimiter; + $outputheader3 .= $row['trial_code'].$delimiter; + $keys[] = $row['phenotype_uid'].$row['experiment_uid']; + } + $nexp=$ncols; + + $sql = "SELECT DISTINCT line_records.line_record_name, line_records.line_record_uid + FROM line_records + where line_record_uid IN ($selectedlines)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while($row = mysqli_fetch_array($res)) { + $lines_names[] = $row['line_record_name']; + $line_uid[] = $row['line_record_uid']; + } + $nlines = count($lines); + //die($sql . "
" . $nlines); + + if ($nexp ===1){ + $nheaderlines = 1; + } else { + $nheaderlines = 2; + } + $outputheader1 = "$nlines".$delimiter."$ncols".$delimiter.$nheaderlines; + //if (DEBUG>1) echo $outputheader1."\n".$outputheader2."\n".$outputheader3."\n"; + // $firephp->log("number traits and lines ".$outputheader1); + if ($nexp ===1){ + $output = $outputheader2."\n"; + } else { + $output = $outputheader3."\n"; + } + + + // loop through all the lines in the file + for ($i=0;$i<$nlines;$i++) { + $outline = $lines_names[$i].$delimiter; + // get selected traits for this line in the selected experiments, change for multiple check lines + /* $sql = "SELECT pd.phenotype_uid, pd.value, tb.experiment_uid + FROM tht_base as tb, phenotype_data as pd + WHERE + tb.line_record_uid = $line_uid[$i] + AND tb.experiment_uid IN ($experiments) + AND pd.tht_base_uid = tb.tht_base_uid + AND pd.phenotype_uid IN ($traits) + ORDER BY pd.phenotype_uid,tb.experiment_uid";*/ + // dem 8oct10: Don't round the data. + // $sql = "SELECT avg(cast(pd.value AS DECIMAL(9,1))) as value,pd.phenotype_uid,tb.experiment_uid + if (preg_match("/\d/",$experiments)) { + $sql_option = " WHERE tb.experiment_uid IN ($experiments) AND "; + } else { + $sql_option = " WHERE "; + } + $sql = "SELECT pd.value as value, pd.phenotype_uid, tb.experiment_uid, tb.tht_base_uid + FROM tht_base as tb, phenotype_data as pd + $sql_option + tb.line_record_uid = $line_uid[$i] + AND pd.tht_base_uid = tb.tht_base_uid + AND pd.phenotype_uid IN ($traits) + GROUP BY tb.tht_base_uid, pd.phenotype_uid"; + //echo "$i $nlines $sql
"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + // // $firephp->log("sql ".$i." ".$sql); + $outarray = array_fill(0,$ncols,-999); + //// $firephp->table('outarray label values', $outarray); + //$outarray = array_fill_keys( $keys , -999); + $outarray = array_combine($keys , $outarray); + //// $firephp->table('outarray label ', $outarray); + while ($row = mysqli_fetch_array($res)) { + $keyval = $row['phenotype_uid'].$row['experiment_uid']; + // $firephp->log("keyvals ".$keyval." ".$row['value']); + $outarray[$keyval]= $row['value']; + } + $outline .= implode($delimiter,$outarray)."\n"; + //// $firephp->log("outputline ".$i." ".$outline); + $output .= $outline; + + } + + return $output; + } + + /** + * build genotype data file for tassle V2 and V3 + * @param unknown_type $experiments + * @param unknown_type $dtype + */ + function type1_build_markers_download($experiments,$dtype) + { + global $mysqli; + $outputheader = ''; + $output = ''; + $doneheader = false; + $delimiter ="\t"; + + if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) + $max_missing = $_GET['mm']; + if ($max_missing>100) + $max_missing = 100; + elseif ($max_missing<0) + $max_missing = 0; + // $firephp->log("in sort markers2"); + $min_maf = 0.01;//IN PERCENT + if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) + $min_maf = $_GET['mmaf']; + if ($min_maf>100) + $min_maf = 100; + elseif ($min_maf<0) + $min_maf = 0; + // $firephp->log("in sort markers".$max_missing." ".$min_maf); + + //get lines and filter to get a list of markers which meet the criteria selected by the user + $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, + SUM(af.total) as total, SUM(af.ab_cnt) AS sumab + FROM allele_frequencies AS af, markers as m + WHERE m.marker_uid = af.marker_uid + AND af.experiment_uid in ($experiments) + group by af.marker_uid"; + + $res = mysqli_query($myslqi, $sql_mstat) or die(mysqli_error($mysqli)); + $num_maf = $num_miss = 0; + while ($row = mysqli_fetch_array($res)){ + $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); + $miss = round(100*$row["summis"]/$row["total"],1); + if (($maf >= $min_maf)AND ($miss<=$max_missing)) { + $marker_names[] = $row["name"]; + $outputheader .= $row["name"].$delimiter; + $marker_uid[] = $row["marker"]; + } + } + $nelem = count($marker_names); + if ($nelem == 0) { + die("error - no genotype or marker data for this experiment, experiment_uid=$experiments"); + } + $marker_uid = implode(",",$marker_uid); + + if ($dtype=='qtlminer') { + $lookup = array( + 'AA' => '1', + 'BB' => '-1', + '--' => 'NA', + 'AB' => '0' + ); + } else { + $lookup = array( + 'AA' => '1:1', + 'BB' => '2:2', + '--' => '?', + 'AB' => '1:2' + ); + } + + // make an empty line with the markers as array keys, set default value + // to the default missing value for either qtlminer or tassel + // places where the lines may have different values + + if ($dtype =='qtlminer') { + $empty = array_combine($marker_names,array_fill(0,$nelem,'NA')); + } else { + $empty = array_combine($marker_names,array_fill(0,$nelem,'?')); + } + + + $sql = "SELECT line_record_name, marker_name AS name, + alleles AS value + FROM + allele_cache as a + WHERE + a.marker_uid IN ($marker_uid) + AND a.experiment_uid IN ($experiments) + ORDER BY a.line_record_uid, a.marker_uid"; + + + $last_line = "some really silly name that noone would call a plant"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + + $outarray = $empty; + $cnt = $num_lines = 0; + while ($row = mysqli_fetch_array($res)){ + //first time through loop + if ($cnt==0) { + $last_line = $row['line_record_name']; + } + + if ($last_line != $row['line_record_name']){ + // Close out the last line + $output .= "$last_line\t"; + $outarray = implode($delimiter,$outarray); + $output .= $outarray."\n"; + //reset output arrays for the next line + $outarray = $empty; + $mname = $row['name']; + $outarray[$mname] = $lookup[$row['value']]; + $last_line = $row['line_record_name']; + $num_lines++; + } else { + $mname = $row['name']; + $outarray[$mname] = $lookup[$row['value']]; + } + $cnt++; + } + //NOTE: there is a problem with the last line logic here. Must fix. + //save data from the last line + $output .= "$last_line\t"; + $outarray = implode($delimiter,$outarray); + $output .= $outarray."\n"; + $num_lines++; + + if ($dtype =='qtlminer') { + return $outputheader."\n".$output; + } else { + return $num_lines.$delimiter.$nelem.":2\n".$outputheader."\n".$output; + } + } + + /** + * build file listing conflicts in genotype data + * @param unknown_type $experiments + * @param unknown_type $dtype + */ + function type1_build_conflicts_download($experiments,$dtype) { + global $mysqli; + //get lines and filter to get a list of markers which meet the criteria selected by the user + $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, + SUM(af.total) as total, SUM(af.ab_cnt) AS sumab + FROM allele_frequencies AS af, markers as m + WHERE m.marker_uid = af.marker_uid + AND af.experiment_uid in ($experiments) + group by af.marker_uid"; + + $res = mysqli_query($mysqli, $sql_mstat) or die(mysqli_error($mysqli)); + $num_maf = $num_miss = 0; + while ($row = mysqli_fetch_array($res)){ + $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); + $miss = round(100*$row["summis"]/$row["total"],1); + if (($maf >= $min_maf)AND ($miss<=$max_missing)) { + $marker_uid[] = $row["marker"]; + } + } + $marker_uid = implode(",",$marker_uid); + $output = "line name\tmarker name\talleles\texperiment\n"; + $query = "select l.line_record_name, m.marker_name, a.alleles, e.trial_code + from allele_conflicts a, line_records l, markers m, experiments e + where a.line_record_uid = l.line_record_uid + and a.marker_uid = m.marker_uid + and a.experiment_uid = e.experiment_uid + and a.alleles != '--' + and a.marker_uid IN ($marker_uid) + order by l.line_record_name, m.marker_name, e.trial_code"; + $res = mysqli_query($mysqli, $query) or die(mysqli_error($mysqli)); + if (mysqli_num_rows($res)>0) { + while ($row = mysqli_fetch_row($res)){ + $output.= "$row[0]\t$row[1]\t$row[2]\t$row[3]\n"; + } + } + return $output; + } + + /** + * build genotype data file when given set of lines and markers + * @param unknown_type $lines + * @param unknown_type $markers + * @param unknown_type $dtype + */ + function type2_build_markers_download($lines,$markers,$dtype) + { + global $mysqli; + $outputheader = ''; + $output = ''; + $doneheader = false; + $delimiter ="\t"; + $max_missing = 10; + $min_maf = 5; + + if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) + $max_missing = $_GET['mm']; + if ($max_missing>100) + $max_missing = 100; + elseif ($max_missing<0) + $max_missing = 0; + // $firephp->log("in sort markers2"); + if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) + $min_maf = $_GET['mmaf']; + if ($min_maf>100) + $min_maf = 100; + elseif ($min_maf<0) + $min_maf = 0; + // $firephp->log("in sort markers".$max_missing." ".$min_maf); + + if (count($markers)>0) { + $markers_str = implode(",", $markers); + } else { + die("error - markers should be selected before download\n"); + } + if (count($lines)>0) { + $lines_str = implode(",", $lines); + } else { + $lines_str = ""; + die("error - must make line selection first
\n"); + } + + //generate an array of selected markers that can be used with isset statement + foreach ($markers as $temp) { + $marker_lookup[$temp] = 1; + } + + $sql = "select marker_uid, marker_name from allele_byline_idx order by marker_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $i=0; + while ($row = mysqli_fetch_array($res)) { + $marker_list[$i] = $row[0]; + $marker_list_name[$i] = $row[1]; + $i++; + } + + foreach ($marker_list as $i => $marker_id) { + $marker_name = $marker_list_name[$i]; + if (isset($marker_lookup[$marker_id])) { + $marker_names[] = $marker_name; + $outputheader .= $marker_name.$delimiter; + $marker_uid[] = $marker_id; + } + } + + if ($dtype=='qtlminer') { + $lookup = array( + 'AA' => '1', + 'BB' => '-1', + '--' => 'NA', + 'AB' => '0', + '' => 'NA' + ); + } else { + $lookup = array( + 'AA' => '1:1', + 'BB' => '2:2', + '--' => '?', + 'AB' => '1:2', + '' => '?' + ); + } + + foreach ($lines as $line_record_uid) { + $sql = "select line_record_name, alleles from allele_byline where line_record_uid = $line_record_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row = mysqli_fetch_array($res)) { + $outarray2 = array(); + $outarray2[] = $row[0]; + $alleles = $row[1]; + $outarray = explode(',',$alleles); + $i=0; + foreach ($outarray as $allele) { + $marker_id = $marker_list[$i]; + if (isset($marker_lookup[$marker_id])) { + $outarray2[]=$lookup[$allele]; + } + $i++; + } + } else { + $sql = "select line_record_name from line_records where line_record_uid = $line_record_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row = mysqli_fetch_array($res)) { + $outarray2 = array(); + $outarray2[] = $row[0]; + $i=0; + foreach ($marker_list as $marker_id) { + if (isset($marker_lookup[$marker_id])) { + $outarray2[]=$lookup[""]; + } + $i++; + } + } else { + die("error - could not find uid\n"); + } + } + $outarray = implode($delimiter,$outarray2); + $output .= $outarray . "\n"; + } + $nelem = count($marker_names); + $num_lines = count($lines); + if ($nelem == 0) { + die("error - no genotype or marker data for this selection"); + } + + // make an empty line with the markers as array keys, set default value + // to the default missing value for either qtlminer or tassel + // places where the lines may have different values + + if ($dtype =='qtlminer') { + $empty = array_combine($marker_names,array_fill(0,$nelem,'NA')); + } else { + $empty = array_combine($marker_names,array_fill(0,$nelem,'?')); + } + + if ($dtype =='qtlminer') { + return $outputheader."\n".$output; + } else { + return $num_lines.$delimiter.$nelem.":2\n".$outputheader."\n".$output; + } + } + + /** + * build genotype data files for tassel V4 + * @param unknown_type $lines + * @param unknown_type $markers + * @param unknown_type $dtype + */ + function type3_build_markers_download($lines,$markers,$dtype) + { + global $mysqli; + $output = ''; + $outputheader = ''; + $delimiter ="\t"; + + if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) { + $max_missing = $_GET['mm']; + } + if ($max_missing>100) { + $max_missing = 100; + } elseif ($max_missing<0) { + $max_missing = 0; + } + $min_maf = 0.01;//IN PERCENT + if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) { + $min_maf = $_GET['mmaf']; + } + if ($min_maf>100) { + $min_maf = 100; + } elseif ($min_maf<0) { + $min_maf = 0; + } + if (isset($_SESSION['selected_map'])) { + $selected_map = $_SESSION['selected_map']; + } else { + $selected_map = 1; + } + + if (count($markers)>0) { + $markers_str = implode(",", $markers); + } else { + die("Error - markers should be selected before analysis"); + } + if (count($lines)>0) { + $lines_str = implode(",", $lines); + } else { + die("Error - lines should be selected before analysis"); + } + + //generate an array of selected lines that can be used with isset statement + foreach ($lines as $temp) { + $line_lookup[$temp] = 1; + } + + $sql = "select line_record_uid, line_record_name from allele_bymarker_idx order by line_record_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $i=0; + while ($row = mysqli_fetch_array($res)) { + $line_list[$i] = $row[0]; + $line_list_name[$i] = $row[1]; + $i++; + } + + //order the markers by map location + $sql = "select markers.marker_uid, markers.marker_name, mim.chromosome, mim.start_position from markers, markers_in_maps as mim, map, mapset + where markers.marker_uid IN ($markers_str) + AND mim.marker_uid = markers.marker_uid + AND mim.map_uid = map.map_uid + AND map.mapset_uid = mapset.mapset_uid + AND mapset.mapset_uid = $selected_map + order by mim.chromosome, CAST(1000*mim.start_position as UNSIGNED), BINARY markers.marker_name"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + $marker_uid = $row[0]; + $chr = $row[2]; + $pos = $row[3]; + $marker_list_mapped[$marker_uid] = $pos; + $marker_list_chr[$marker_uid] = $chr; + } + + $marker_list_all = $marker_list_mapped; + //generate an array of selected markers and add map position if available + $sql = "select marker_uid, marker_name, A_allele, B_allele, marker_type_name from markers, marker_types + where marker_uid IN ($markers_str) + AND markers.marker_type_uid = marker_types.marker_type_uid + order by BINARY marker_name"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + $marker_uid = $row[0]; + $marker_name = $row[1]; + if (isset($marker_list_all[$marker_uid])) { + } else { + $marker_list_all[$marker_uid] = 0; + } + if (preg_match("/[A-Z]/",$row[2]) && preg_match("/[A-Z]/",$row[3])) { + $allele = $row[2] . "/" . $row[3]; + } elseif (preg_match("/DArT/",$row[4])) { + $allele = $row[2] . "/" . $row[3]; + } else { + $allele = "N/N"; + } + $marker_list_name[$marker_uid] = $marker_name; + $marker_list_allele[$marker_uid] = $allele; + } + + //get location in allele_byline for each marker + $sql = "select marker_uid, marker_name from allele_byline_idx order by marker_uid"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + $i=0; + while ($row = mysqli_fetch_array($res)) { + $marker_idx_list[$row[0]] = $i; + $i++; + } + + //get header + $empty = array(); + $outputheader = "rs\talleles\tchrom\tpos"; + $sql = "select line_record_name from line_records where line_record_uid IN ($lines_str)"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + $name = $row[0]; + $outputheader .= "\t$name"; + $empty[$name] = "NN"; + } + + //using a subset of markers so we have to translate into correct index + //if there is no map then use chromosome 0 and index for position + $pos_index = 0; + foreach ($marker_list_all as $marker_id => $val) { + $marker_idx = $marker_idx_list[$marker_id]; + $marker_name = $marker_list_name[$marker_id]; + $allele = $marker_list_allele[$marker_id]; + + $lookup = array( + 'AA' => 1, + 'BB' => -1, + '--' => 'NA', + 'AB' => 0, + 'BA' => 0, + '' => 'NA' + ); + + $sql = "select A_allele, B_allele, mim.chromosome, mim.start_position from markers, markers_in_maps as mim, map, mapset where markers.marker_uid = $marker_id + AND mim.marker_uid = markers.marker_uid + AND mim.map_uid = map.map_uid + AND map.mapset_uid = mapset.mapset_uid + AND mapset.mapset_uid = $selected_map"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row = mysqli_fetch_array($res)) { + $chrom = $row[2]; + if (preg_match('/[0-9]+/',$chrom, $match)) { + $pos = 100 * $row[3]; + } else { + $chrom = 0; + $pos = $pos_index; + $pos_index += 10; + } + } else { + $chrom = 0; + $pos = $pos_index; + $pos_index += 10; + } + $outarray2 = array(); + $sql = "select marker_name, alleles from allele_bymarker where marker_uid = $marker_id"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + if ($row = mysqli_fetch_array($res)) { + $alleles = $row[1]; + $outarray = explode(',',$alleles); + foreach ($outarray as $key=>$allele) { + $line_id = $line_list[$key]; + if (isset($line_lookup[$line_id])) { + $outarray2[]=$lookup[$allele]; + } + } + $allele_str = implode("\t",$outarray2); + $output .= "$marker_name\t$allele\t$chrom\t$pos"; + $output .= "\t$allele_str\n"; + } else { + echo "Error - could not find marker_uid $marker_id
\n"; + } + } + return $outputheader."\n".$output; + } + + /** + * build genotype conflicts file when given set of lines and markers + * @param unknown_type $lines + * @param unknown_type $markers + * @return string + */ + function type2_build_conflicts_download($lines,$markers) { + global $mysqli; + + if (count($markers)>0) { + $markers_str = implode(",",$markers); + } else { + $markers_str = ""; + } + if (count($lines)>0) { + $lines_str = implode(",",$lines); + } else { + $lines_str = ""; + } + //get lines and filter to get a list of markers which meet the criteria selected by the user + if (preg_match('/[0-9]/',$markers_str)) { + } else { + //get genotype markers that correspond with the selected lines + $sql_exp = "SELECT DISTINCT marker_uid FROM allele_cache + WHERE + allele_cache.line_record_uid in ($lines_str)"; + $res = mysqli_query($mysqli, $sql_exp) or die(mysql_error($mysqli)); + if (mysqli_num_rows($res)>0) { + while ($row = mysqli_fetch_array($res)){ + $markers[] = $row["marker_uid"]; + } + } + $markers_str = implode(',',$markers); + } + $output = "line name\tmarker name\talleles\texperiment\n"; + $query = "select l.line_record_name, m.marker_name, a.alleles, e.trial_code + from allele_conflicts a, line_records l, markers m, experiments e + where a.line_record_uid = l.line_record_uid + and a.marker_uid = m.marker_uid + and a.experiment_uid = e.experiment_uid + and a.alleles != '--' + and a.line_record_uid IN ($lines_str) + and a.marker_uid IN ($markers_str) + order by l.line_record_name, m.marker_name, e.trial_code"; + $res = mysqli_query($mysqli, $query) or die(mysqli_error($mysqli)); + if (mysqli_num_rows($res)>0) { + while ($row = mysqli_fetch_row($res)){ + $output.= "$row[0]\t$row[1]\t$row[2]\t$row[3]\n"; + } + } + return $output; + } + + /** + * create map file in Tassel V2 format + * @param string $experiments + * @return string + */ + function type1_build_annotated_align($experiments) + { + global $mysqli; + $delimiter ="\t"; + $output = ''; + $doneheader = false; + if (isset($_GET['mm']) && !empty($_GET['mm']) && is_numeric($_GET['mm'])) + $max_missing = $_GET['mm']; + if ($max_missing>100) + $max_missing = 100; + elseif ($max_missing<0) + $max_missing = 0; + // $firephp->log("in sort markers2"); + $min_maf = 0.01;//IN PERCENT + if (isset($_GET['mmaf']) && !is_null($_GET['mmaf']) && is_numeric($_GET['mmaf'])) + $min_maf = $_GET['mmaf']; + if ($min_maf>100) + $min_maf = 100; + elseif ($min_maf<0) + $min_maf = 0; + // $firephp->log("in sort markers".$max_missing." ".$min_maf); + + //get lines and filter to get a list of markers which meet the criteria selected by the user + $sql_mstat = "SELECT af.marker_uid as marker, m.marker_name as name, SUM(af.aa_cnt) as sumaa, SUM(af.missing)as summis, SUM(af.bb_cnt) as sumbb, + SUM(af.total) as total, SUM(af.ab_cnt) AS sumab + FROM allele_frequencies AS af, markers as m + WHERE m.marker_uid = af.marker_uid + AND af.experiment_uid in ($experiments) + group by af.marker_uid"; + + $res = mysqli_query($mysqli, $sql_mstat) or die(mysqli_error($mysqli)); + $num_maf = $num_miss = 0; + + while ($row = mysqli_fetch_array($res)){ + $maf = round(100*min((2*$row["sumaa"]+$row["sumab"])/(2*$row["total"]),($row["sumab"]+2*$row["sumbb"])/(2*$row["total"])),1); + $miss = round(100*$row["summis"]/$row["total"],1); + if (($maf >= $min_maf)AND ($miss<=$max_missing)) { + $marker_names[] = $row["name"]; + $outputheader .= $delimiter.$row["name"]; + $marker_uid[] = $row["marker"]; + + } + } + // $firephp->log($marker_uid); + + $lookup = array( + 'AA' => 'A','BB' => 'B','--' => '-','AB' => 'C' + ); + $lookup_chrom = array( + '1H' => '1','2H' => '2','3H' => '3','4H' => '4','5H' => '5', + '6H' => '6','7H' => '7','UNK' => '10' + ); + + // finish writing file header using a list of line names + $sql = "SELECT DISTINCT lr.line_record_name AS line_name + FROM line_records AS lr, tht_base AS tb + WHERE + lr.line_record_uid = tb.line_record_uid + AND tb.experiment_uid IN ($experiments) + ORDER BY line_name"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + while ($row = mysqli_fetch_array($res)) { + $line_names[] = $row['line_name']; + } + + // make an empty marker with the lines as array keys + $nelem = count($marker_uid); + $n_lines = count($line_names); + $empty = array_combine($line_names,array_fill(0,$n_lines,'-')); + $nemp = count($empty); + $marker_uid = implode(",",$marker_uid); + $line_str = implode($delimiter,$line_names); + // $firephp = log($nelem." ".$n_lines); + + // write output file header + $outputheader = "\n".$delimiter."Yes\n"; + $outputheader .= "".$delimiter.$n_lines."\n"; + $outputheader .= "".$delimiter.$nelem."\n"; + $outputheader .= "".$delimiter."Catagorical\n"; + $outputheader .= "".$delimiter."No\n"; + $outputheader .= "".$delimiter.$line_str."\n"; + $outputheader .= "".$delimiter."".$delimiter."".$delimiter."\n"; + // $firephp = log($outputheader); + + // get marker map data, line and marker names; use latest consensus map + // as the map default + $mapset = 1; + $sql = "SELECT mim.chromosome, mim.start_position, lr.line_record_name as lname, m.marker_name AS mname, + CONCAT(a.allele_1,a.allele_2) AS value + FROM + markers as m, + markers_in_maps as mim, + map, + mapset, + line_records as lr, + alleles as a, + tht_base as tb, + genotyping_data as gd + WHERE + a.genotyping_data_uid = gd.genotyping_data_uid + AND mim.marker_uid = m.marker_uid + AND m.marker_uid = gd.marker_uid + AND gd.marker_uid IN ($marker_uid) + AND mim.map_uid = map.map_uid + AND map.mapset_uid = mapset.mapset_uid + AND mapset.mapset_uid = '$mapset' + AND tb.line_record_uid = lr.line_record_uid + AND gd.tht_base_uid = tb.tht_base_uid + AND tb.experiment_uid IN ($experiments) + ORDER BY mim.chromosome,mim.start_position, m.marker_uid, lname"; + + + $last_marker = "somemarkername"; + $res = mysqli_query($mysqli, $sql) or die(mysqli_error($mysqli)); + + $outarray = $empty; + $cnt = $num_markers = 0; + while ($row = mysqli_fetch_array($mysqli, $res)){ + //first time through loop + if ($cnt==0) { + $last_marker = $row['mname']; + $pos = $row['start_position']; + $chrom = $lookup_chrom[$row['chromosome']]; + } + + if ($last_marker != $row['mname']){ + // Close out the last marker + $output .= "$chrom\t$pos\t$last_marker\t"; + $outarray = implode("",$outarray); + $output .= $outarray."\n"; + //reset output arrays for the next line + $outarray = $empty; + $lname = $row['lname']; //start new line + $outarray[$lname] = $lookup[$row['value']]; + $last_marker = $row['mname']; + $pos = $row['start_position']; + $chrom = $lookup_chrom[$row['chromosome']]; + $num_markers++; + } else { + $lname = $row['lname']; + $outarray[$lname] = $lookup[$row['value']]; + } + $cnt++; + } + + //save data from the last line + $output .= "$chrom\t$pos\t$last_marker\t"; + $outarray = implode("",$outarray); + $output .= $outarray."\n"; + $num_markers++; + + return $outputheader.$output; + } +}// end class