diff --git a/FBbt-GO_routine/FBbt-GO_routine.sh b/FBbt-GO_routine/FBbt-GO_routine.sh new file mode 100644 index 0000000000000000000000000000000000000000..6d5438ec5cca242f80e9f6f020e5a7431a3afa9d --- /dev/null +++ b/FBbt-GO_routine/FBbt-GO_routine.sh @@ -0,0 +1,46 @@ +#The file FBbt-GO.obo contains all terms from fbbt-simple.obo and all terms that are subclasses of GO:0005575 'cellular component' from GO. It is for use in canto to provide a list of valid anatomy terms for phenotype curation at FlyBase and is (intentionally) missing some classification on the GO terms. #Most of the header comes from fbbt-simple.obo. + +#A new FBbt-GO.obo can be generated by running the following commands in a terminal after navigating to this location. +#You must have ROBOT installed - see http://robot.obolibrary.org/ + +## Copy the two ontologies to merge from ./import_export/ to ./FBbt-GO_routine/ +cp ./import_export/go-basic.obo ./FBbt-GO_routine +cp ./import_export/fly_anatomy.obo ./FBbt-GO_routine + +cd ./FBbt-GO_routine +## Extract everything that is a subclass of 'GO: cellular component' +robot extract --method MIREOT \ + --input go-basic.obo \ + --branch-from-term http://purl.obolibrary.org/obo/GO_0005575 \ + --output GO_cellcomponent_module_mireot.owl + +## Get all labels in each ontology +robot query --input GO_cellcomponent_module_mireot.owl \ + --query get_labels.sparql GO_labels.csv +robot query --input fly_anatomy.obo \ + --query get_labels.sparql fbbt_labels.csv + +## Make a list of fbbt IDs that have same labels as GO terms and remove these from fbbt-simple +python duplicate_label_id_finder.py +robot remove --input fly_anatomy.obo \ + --term-file duplicate_terms.txt \ + --select classes \ + --output fbbt-no-GO.obo + +## Merge this module into fbbt-simple.obo and annotate with github location +robot merge --input fbbt-no-GO.obo \ + --input GO_cellcomponent_module_mireot.owl \ +annotate --ontology-iri "https://github.com/FlyBase/drosophila-anatomy-developmental-ontology/for_canto/fbbt-GO.obo" \ + --output pre-FBbt-GO.obo + +## Delete all lines "namespace: cellular_component" +grep -v "namespace: cellular_component" pre-FBbt-GO.obo > FBbt-GO.obo + +## copy + +## Cleanup - Delete the two original ontologies, GO module, term lists and temp files +rm go-basic.obo fly_anatomy.obo GO_cellcomponent_module_mireot.owl GO_labels.csv fbbt_labels.csv duplicate_terms.txt fbbt-no-GO.obo pre-FBbt-GO.obo + +cd .. + +mv ./FBbt-GO_routine/FBbt-GO.obo ./import_export/ \ No newline at end of file diff --git a/FBbt-GO_routine/duplicate_label_id_finder.py b/FBbt-GO_routine/duplicate_label_id_finder.py new file mode 100644 index 0000000000000000000000000000000000000000..a3dc7b03d38e24e6c50eb319b35a126724d6ad89 --- /dev/null +++ b/FBbt-GO_routine/duplicate_label_id_finder.py @@ -0,0 +1,25 @@ +#!/usr/bin/python + +import csv +from collections import OrderedDict + +def csv2dict(filename): + reader = csv.DictReader(open(filename, 'rb')) + + csvdict = OrderedDict() + for row in reader: + csvdict[row['id']] = row['label'] + + return csvdict + +GO_dict = csv2dict('GO_labels.csv') +fbbt_dict = csv2dict('fbbt_labels.csv') + +fbbt_to_drop = [fbbt for fbbt,label in fbbt_dict.items() if label in GO_dict.values()] +fbbt_to_drop = [fbbt.replace('http://purl.obolibrary.org/obo/FBbt_','FBbt:') for fbbt in fbbt_to_drop] +fbbt_to_drop = sorted(fbbt_to_drop) + +with open ('duplicate_terms.txt', 'w') as f: + for item in fbbt_to_drop: + f.write("%s\n" % item) + diff --git a/FBbt-GO_routine/get_labels.sparql b/FBbt-GO_routine/get_labels.sparql new file mode 100644 index 0000000000000000000000000000000000000000..0f97739a1db17377694d1a4686b70d078f3cf9d8 --- /dev/null +++ b/FBbt-GO_routine/get_labels.sparql @@ -0,0 +1,9 @@ +prefix owl: <http://www.w3.org/2002/07/owl#> +prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + +SELECT ?id ?label +WHERE +{ + ?id a owl:Class . + ?id rdfs:label ?label +} \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 724903d4b4d5a46d477119f1f5f4b22c9a8a34fe..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# PDN Canto Config - -Configuration files for Canto to support PDN \ No newline at end of file diff --git a/canto-docker-initd b/canto-docker-initd new file mode 100644 index 0000000000000000000000000000000000000000..41099046d0a7ccfac710a1267f57fdcf8d21a16c --- /dev/null +++ b/canto-docker-initd @@ -0,0 +1,38 @@ +#! /bin/sh +# /etc/init.d/canto + +# Installation instructions: https://github.com/pombase/canto/blob/master/etc/canto-init.d.md + +action=$1 +port=$2 + +WORKERS=5 +CANTO_SPACE='/var/canto-space/' + +PID_PATH=import_export/canto.pid + +# Carry out specific functions when asked to by the system +case "$action" in + start) + + echo "Starting Canto with $WORKERS workers" + + (date; cd $CANTO_SPACE; canto/script/canto_docker --non-interactive start_server --pid-file=/$PID_PATH --port $port -- script/canto_start --workers $WORKERS --keepalive-timeout 5 -s Starman --preload) >> canto.log 2>&1 & + ;; + stop) + pid=`/bin/cat $CANTO_SPACE/$PID_PATH` + echo stopping $pid + (cd $CANTO_SPACE; docker exec canto kill -TERM $pid) + ;; + restart) + pid=`/bin/cat $CANTO_SPACE/$PID_PATH` + echo restarting $pid + (cd $CANTO_SPACE; docker exec canto kill -HUP $pid) + ;; + *) + echo "Usage: $0 {start|restart|stop}" + exit 1 + ;; +esac + +exit 0 \ No newline at end of file diff --git a/canto_deploy.yaml b/canto_deploy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7525d7d87e952d7da5f7853e947d436573808f9 --- /dev/null +++ b/canto_deploy.yaml @@ -0,0 +1,344 @@ +--- +# Canto config file -*- conf -*- +name: Fly-Canto +long_name: FlyBase-pheno-test-local + + +instance_organism: + taxonid: 7227 + +flybase_mode: 1 + +Model::TrackModel: + connect_info: +# filepath plus track.sqlite3 + - dbi:SQLite:dbname=/data/track.sqlite3 + schema_class: Canto::TrackDB +# filepath +data_directory: data + +#Database to connect to - 'connect_info:' bit is the only part that needs changes on a weekly basis +Model::ChadoModel: + connect_info: + - "dbi:Pg:dbname=flybase; host=deneb.pdn.cam.ac.uk" + - flybase + - flybase + schema_class: Canto::ChadoDB + +#This is required to enable the Chado database connection +chado: + taxon_id_lookup_strategy: dbxref + ignore_case_in_gene_query: 0 +#This is also required to enable the Chado database connection +implementation_classes: + gene_adaptor: Canto::Chado::GeneLookup + go_adaptor: Canto::Track::OntologyLookup + ontology_adaptor: Canto::Track::OntologyLookup + + +# configuration of alleles +## trying to remove fields for strain and allele expression - did not seem to work on initial test +alleles_have_expression: 0 +strains_mode: 0 +split_genotypes_by_organism: 0 +show_genotype_management_genes_list: 0 +diploid_mode: 1 + +## allow internal notes on single alleles and config: +notes_on_single_allele_genotypes_only: 1 +allele_note_types: + - name: phenotype + display_name: Phenotype + - name: genetic_interaction + display_name: Genetic interaction + - name: xenogenetic_interaction + display_name: Xenogenetic interaction + - name: rescue_complementation + display_name: Rescue/Complementation + - name: internal_note + display_name: Internal note + +## display wildtype 'alleles' simply as '+' +wildtype_name_template: '@@gene_display_name@@[+]' + +## configuration of allele types +allele_type_list: + - name: other + export_type: other + show_description: true + description_required: false + allele_name_required: false + allow_expression_change: true + expression_required: true + hide_type_name: true + placeholder: Please provide a description + - name: accessory + export_type: accessory + show_description: true + description_required: false + allele_name_required: false + allow_expression_change: true + expression_required: true + do_not_annotate: 1 + placeholder: Please provide a description + - name: aberration + export_type: aberration + show_description: true + description_required: false + allele_name_required: false + allow_expression_change: false + expression_required: false + + +# the name of the organisation running this Canto - shown in various places in the user interface +database_name: Fly-Canto + +database_url: www.flybase.org + +instance_front_subtitle: Canto for FlyBase + +available_annotation_type_list: + - name: phenotypic_class + category: ontology + namespace: phenotypic_class + hide_extension_relations: [qual,dv_qual] + very_short_display_name: 'phen-class' + short_display_name: 'phen-class' + display_name: 'phenotype class' + synonyms_to_display: + - exact + feature_type: 'genotype' + can_have_conditions: 0 + broad_term_suggestions: > + lethal, sterile, visible, cell number defective, locomotor behavior defective + specific_term_examples: > + increased cell size, small body + help_text: > + Annotate abnormal phenotypes of organims or cells with this genotype. + more_help_text: > + This a basic discription of a phenotype, i.e. any observable characteristic or trait of an organism; + observable characteristics include morphology, development, behaviour + extra_help_text: > + Alleles may be annotated with multiple phenotype terms. + detailed_help_path: /docs/fypo_annotation + - name: anatomy + category: ontology + namespace: fly_anatomy.ontology + hide_extension_relations: [qual,dv_qual] + very_short_display_name: 'anatomy' + short_display_name: 'anatomy' + display_name: 'phenotype anatomy' + synonyms_to_display: + - exact + feature_type: 'genotype' + can_have_conditions: 0 + broad_term_suggestions: > + embryo, wing, eye, ovary, wing disc, neuromuscular junction + specific_term_examples: > + NMJ bouton, oocyte, photoreceptor cell of the eye + help_text: > + Annotate the anatomy (organ, structure, cell or subcellular structure) where abnormal phenotype occurs in organims or cells with this genotype. + more_help_text: > + A phenotype is any observable characteristic or trait of an organism; + observable characteristics include morphology, development, behaviour + extra_help_text: > + Alleles may be annotated with multiple phenotype terms + detailed_help_path: /docs/fypo_annotation + - name: phenotype_interaction + category: interaction + namespace: phenotypic_class + term_suggestions_annotation_type: phenotypic_class + interaction_term_required: 1 + hide_extension_relations: [qual,dv_qual] + display_name: 'phenotype class interaction' + very_short_display_name: 'phen-class interaction' + short_display_name: 'phen-class interaction' + feature_type: 'metagenotype' + help_text: 'Examples: suppressible, non-suppressible, enhanceable, non-enhanceable, non-modified.' + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + detailed_help_path: /docs/genetic_interaction_annotation + - name: anatomy_interaction + category: interaction + namespace: fly_anatomy.ontology + term_suggestions_annotation_type: anatomy + interaction_term_required: 1 + hide_extension_relations: [qual,dv_qual] + display_name: 'phenotype anatomy interaction' + very_short_display_name: 'anatomy interaction' + short_display_name: 'anatomy interaction' + feature_type: 'metagenotype' + help_text: 'Examples: suppressible, non-suppressible, enhanceable, non-enhanceable, non-modified.' + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + detailed_help_path: /docs/genetic_interaction_annotation + - name: complementation + can_have_conditions: 0 + single_allele_only: 1 + interaction_same_locus: 1 + category: interaction + display_name: 'complementation' + feature_type: 'metagenotype' + help_text: 'Examples: Complements, Partially complements, Fails to complement' + evidence_codes: + - Complements + - Partially complements + - Fails to complement + - name: rescue + can_have_conditions: 0 + single_locus_only: 1 + interaction_same_locus: 1 + category: interaction + display_name: 'rescue' + feature_type: 'metagenotype' + help_text: 'Examples: Rescues, Partially Rescues, Fails to Rescue' + evidence_codes: + - Rescues + - Partially rescues + - Fails to rescue + + +# restrict the list of annotation types +enabled_annotation_type_list: + - phenotypic_class + - anatomy + - phenotype_interaction + - anatomy_interaction + - complementation + - rescue + + +ontology_namespace_config: + subsets_to_ignore: + primary_autocomplete: + - "is_a(Grouping_terms)" + primary_select: + - "is_a(Grouping_terms)" + - "is_a(qc_do_not_annotate)" + extension: +# - add the ash to allow top/root terms (while still disallowing 'do not annotate' terms): + # - "is_a(canto_root_subset)" + - "is_a(qc_do_not_annotate)" + do_not_annotate_subsets: + - "is_a(canto_root_subset)" + - "is_a(qc_do_not_annotate)" + + +# configuration for annotation extensions in the user interface +# See: https://github.com/pombase/canto/wiki/AnnotationExtensionConfig +extension_conf_files: + - extension_config.tsv + + +# to show the gene list on the genotype management gene list, un-comment the next three lines - i.e. remove the #. The spaces before "taxonid:" are needed +#show_genotype_management_genes_list: 1 +#instance_organism: +# taxonid: 7227 + + +cache: + memcached: + servers: + - 127.0.0.1:11211 + + +#which interaction evidence codes can be used with which phenotype classes +namespace_term_evidence_codes: + phenotypic_class: + # viable (minus partially lethal - majority live) + - constraint: "is_a(FBcv:0000349)-is_a(FBcv:0000350)" + evidence_codes: [] + # increased mortality during development (except lethal) + - constraint: "is_a(FBcv:0002019)-is_a(FBcv:0000351)" + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + # lethal (but not the more specific child terms) + - constraint: "is_a(FBcv:0000351)-is_a(FBcv:0002000)" + evidence_codes: + - suppressible + - non-suppressible + # lethal terms detailed with developmental stage + - constraint: "is_a(FBcv:0002000)" + evidence_codes: + - suppressible + - non-suppressible + - non-enhanceable + - non-modified + # partially lethal + - constraint: "is_a(FBcv:0002015)" + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + # some die during embryonic stage + - constraint: "is_a(FBcv:0002041)" + evidence_codes: + - suppressible + - non-suppressible + - non-enhanceable + - non-modified + # some die during larval stage + - constraint: "is_a(FBcv:0002023)" + evidence_codes: + - suppressible + - non-suppressible + - non-enhanceable + - non-modified + # some die during P-stage + - constraint: "is_a(FBcv:0002020)" + evidence_codes: + - suppressible + - non-suppressible + - non-enhanceable + - non-modified + # some die during immature adult stage + - constraint: "is_a(FBcv:0002051)" + evidence_codes: + - suppressible + - non-suppressible + - non-enhanceable + - non-modified + # fertile (minus semi-fertile) + - constraint: "is_a(FBcv:0000374)-is_a(FBcv:0000375)" + evidence_codes: [] + # semi-fertile + - constraint: "is_a(FBcv:0000375)" + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + # semi-sterile + - constraint: "is_a(FBcv:0000365)" + evidence_codes: + - suppressible + - "suppressible | partially" + - enhanceable + - non-suppressible + - non-enhanceable + - non-modified + # sterile + - constraint: "is_a(FBcv:0000364)" + evidence_codes: + - suppressible + - non-suppressible diff --git a/canto_for_etc-initd b/canto_for_etc-initd new file mode 100644 index 0000000000000000000000000000000000000000..18808ce47ccb410beb718aef4d0d8857963ffcd2 --- /dev/null +++ b/canto_for_etc-initd @@ -0,0 +1,15 @@ +#!/bin/sh - +# +### BEGIN INIT INFO +# Provides: canto +# Required-Start: $remote_fs $syslog +# Required-Stop: $remote_fs $syslog +# Should-Start: +# Should-Stop: +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Canto +### END INIT INFO +# + +su -c "/sbin/canto-docker-initd $* 7000" root & \ No newline at end of file diff --git a/extension_config-Rscript.RData b/extension_config-Rscript.RData new file mode 100644 index 0000000000000000000000000000000000000000..97cf907e759aaecc1bfb02783ea5947ae82e345b Binary files /dev/null and b/extension_config-Rscript.RData differ diff --git a/extension_config-Rscript/.Rhistory b/extension_config-Rscript/.Rhistory new file mode 100644 index 0000000000000000000000000000000000000000..3c6ea6396d8b553858a7a5fdfcd76e40b594cc4d --- /dev/null +++ b/extension_config-Rscript/.Rhistory @@ -0,0 +1,30 @@ +source('~/Desktop/homework/canto/extension_config-Rscript/test-script2.R') +obo1 +ids +obo1 +source('~/Desktop/homework/canto/extension_config-Rscript/test-script2.R') +head(Df) +source('~/Desktop/homework/canto/extension_config-Rscript/test-script3.R') +length(phen) +phen[1] +phen +source('~/Desktop/homework/canto/extension_config-Rscript/test-script3.R') +qualist[3,1] +quallist[3,1] +count +source('~/Desktop/homework/canto/Scripts/extension_config-Rscript/extension_config copy.R') +count +head(Df) +phen +m +quallist +source('~/Desktop/homework/canto/Scripts/extension_config-Rscript/extension_config copy.R') +source('~/Desktop/homework/canto/Scripts/extension_config-Rscript/extension_config copy.R') +source('~/Desktop/homework/canto/Scripts/extension_config-Rscript/extension_config copy.R') +source('~/Desktop/homework/canto/Scripts/extension_config-Rscript/extension_config copy.R') +Df1 +source('~/Desktop/homework/canto/Scripts/4extension_config-Rscript/list-to-extension_config.R') +setwd("./") +pwd +setwd("./") +getwd() diff --git a/extension_config-Rscript/allowed_qualifiers.tsv b/extension_config-Rscript/allowed_qualifiers.tsv new file mode 100644 index 0000000000000000000000000000000000000000..53401d1a2fe4738c26bc015ea631ece01504a41f --- /dev/null +++ b/extension_config-Rscript/allowed_qualifiers.tsv @@ -0,0 +1,7 @@ +subtree annotation_type qualifier +FBcv:0000347 phenotype genotype_to_phenotype_relation,environmental_qualifier,sex_qualifier,clone_qualifier,dominance_qualifier,intensity_qualifier, +FBbt:10000000 phenotype genotype_to_phenotype_relation,environmental_qualifier,sex_qualifier,clone_qualifier,spatial_qualifier,structural_qualifier +GO:0005575 phenotype genotype_to_phenotype_relation,environmental_qualifier,sex_qualifier,clone_qualifier,spatial_qualifier,structural_qualifier +FBcv:0000347 genetic interaction genotype_to_phenotype_relation,sex_qualifier,clone_qualifier, +FBbt:10000000 genetic interaction genotype_to_phenotype_relation,sex_qualifier,clone_qualifier, +GO:0005575 genetic interaction genotype_to_phenotype_relation,sex_qualifier,clone_qualifier, \ No newline at end of file diff --git a/extension_config-Rscript/list-to-extension_config.R b/extension_config-Rscript/list-to-extension_config.R new file mode 100644 index 0000000000000000000000000000000000000000..b8e780025a537d3ef022c0987786307d0eba8894 --- /dev/null +++ b/extension_config-Rscript/list-to-extension_config.R @@ -0,0 +1,176 @@ +# This Rscript creates an extension_config.tsv file that defines which qualifiers (extensions in pombase lingo) can go with which type of annotation, specified in 'allowed_qualifiers.tsv' +# The list of allowed qualifiers comes as an array of namespaces +# The annotation types are defined as the top term IDs for the type of annotation + # phenotypic class - FBcv:0000347 + # anatomy/manifests uses two ontologies and thus needs two ids: + # flybase anatomy - FBbt:10000000 + # GO CC - GO:0005575. +# extension_config.tsv only uses IDs and, therefore, qualifier namespaces must be converted into their topterm IDs, whcih requires screening the FBcv obo file. + +#set work directory as canto-space +setwd("./") +getwd() + +# 1 - pick the default namespaces from the FBcv ontology +##tempFBcvobo.txt is a derivation of the original FBcv obo file, stripped of text, etc, and with ':' replaced by '\t' so that R can read it as a 2 colmun table +obo<-read.table("./extension_config-Rscript/tempFBcvobo.txt", sep='\t', fill=TRUE, quote = "") +obo[,1]<-as.character(obo[,1]) +obo[,2]<-as.character(obo[,2]) +ids<-which(obo[,1]=="id") +idl<-length(ids) +a<-0 + +#Create a working table for term-term relationships, etc +Df<-data.frame(matrix(ncol=6), stringsAsFactors = FALSE) +colnames(Df)<-c("cv term","name","namespace","parent term","top namespace", "topmost namespace?") + +#loop to remove obsolete terms, and move CV, name, namespace, and relation into separate columns +for (i in 1:(idl-1)) { + obo1<-obo[(ids[i]:ids[i+1]),] + obsolete<-which(obo1[,1]=="is_obsolete", obo1[,2]=="true") + if (length(obsolete)==0){ + a=a+1 + Df[a,1]=obo[ids[i],2] + Df[a,2]=obo[ids[i]+1,2] + b<-which(obo1[,1]=="namespace") + if (length(b)==1){ + Df[a,3]<-obo[(ids[i]+b-1),2] + } + c<-which(obo1[,1]=="is_a") + if (length(c)==1){ + Df[a,4]<-obo[(ids[i]+c-1),2] + } + } +} + +#same loop as above but only for the last term - removes obsolete terms, and moves CV, name, namespace, and relation into sep columns +obo1<-obo[(ids[i+1]:max(ids)),] +obsolete<-which(obo1[,1]=="is_obsolete", obo1[,2]=="true") +if (length(obsolete)==0){ + a=a+1 + Df[a,1]=obo[ids[i+1],2] + Df[a,2]=obo[ids[i+1]+1,2] + b=which(obo1[,1]=="namespace") + if (length(b)==1){ + Df[a,3]<-obo[(ids[i+1]+b),2] + } + c<-which(obo1[,1]=="is_a") + if (length(c)==1){ + Df[a,4]<-obo[(ids[i+1]+c-1),2] + } +} + +#for the is_a field, this will crop the CV term (e.g. FBcv:0000001 -> 4 letters+colon+7 numbers, adding up to 12 characters) +Df[,4]<-substr(Df[,4], 1, 12) + +#Loop to pick the top-most namespace, using the is_a parent-child relationships +## 1 - set top namespace +Df[which(Df[,1]=="FBcv:0000000"),3]<-"FlyBase_miscellaneous_CV" +## 2 - if, existing, pick the current namespace +for(j in 1:nrow(Df)){ + if(is.na(Df[j,3])==FALSE){Df[j,5]<-Df[j,3]} +} +## 3 - otherwise, go through all childs of a term and, if child does not have a namespace, pick the parent namespace +rm(j) +for(j in 1:nrow(Df)){ + childs<-which(Df[,4]==Df[j,1]) + if(length(childs)>0){ + for(k in 1:length(childs)){ + if(is.na(Df[childs[k],5])==FALSE){} + else{if(is.na(Df[j,3])==FALSE){ + Df[childs[k],5]<-Df[j,3]} + else{ + if(is.na(Df[j,5])==FALSE){Df[childs[k],5]<-Df[j,5]} + } + } + } + rm(k) + for(k in 1:length(childs)){ + if(is.na(Df[childs[k],5])==FALSE&is.na(Df[j,3])==FALSE&Df[childs[k],5]==Df[j,3]){ + Df[childs[k],6]<-"child" + } + } + } +} + + +# create the template for the 'extension_config.tsv' file +Df1<-data.frame(matrix(ncol=8), stringsAsFactors = FALSE) +colnames(Df1)<-c("domain ID", "subset relation", "extension relation", "range ID", "Canto display text", "Help text", "cardinality", "role") + +#read file with list of allowed qualifiers (on "allowed_qualifiers.tsv") +quallist<-read.table("./extension_config-Rscript/allowed_qualifiers.tsv", sep="\t", header=TRUE, stringsAsFactors=FALSE) + +count<-1 +#for each type of allowed qualifier, replace the namespace with corresponding top term with that namespace +for(m in 1:nrow(quallist)){ + phen<-unlist(strsplit(quallist[m,3],",")) + for(l in 1:length(phen)){ + phen[l]<-Df[which(Df[,3]==phen[l]&Df[,5]==phen[l]&is.na(Df[,6])==TRUE),1] + } + + if(quallist[m,2]=='phenotype'){ + #if the annotations are for 'phenotypic class' (FBcv:0000347) then: + # a) the single term 'progressive' shoud be allowed + if(quallist[m,1]==Df[which(Df[,2]=="phenotypic class"),1]){ + Df1[count,1]<-quallist[m,1] + phen<-append(phen, Df[which(Df[,2]=="progressive"), 1]) + Df1[count,4]<-paste(phen,collapse = '|') + count<-count+1 + # b) and 'the 'fertility/sterility terms must not allow developmental stage terms + ##once the ontology file is updated so that all viability/mortality/life-span terms are under a common parent term, this next bit should be updated + viable<-c("viable","increased mortality","long lived") + for (n in 1:length(viable)){ + viable[n]<-Df[which(Df[,2]==viable[n]),1] + } + #once the ontology file is updated to put term with decreased fertility under a common parent term (e.g. 'decreased fertily'), the next bit should be updated to refer to the new top term + sterile<-c("fertile", "sterile","semi-sterile","semi-fertile") + for (o in 1:length(sterile)){ + sterile[o]<-Df[which(Df[,2]==sterile[o]),1] + } + viablesterile<-c(viable,sterile) + Df1[count,1]<-paste(quallist[m,1],"-is_a(",paste(viablesterile, collapse=")&is_a("),")",sep="") + Df1[count,4]<-"FBdv:00007008" + Df1[count,3]<-"dv_qual" + Df1[count,5]<-"Developmental stage qualifier" + Df1[count,7]<-"0,1" + count<-count+1 + } + else{ + Df1[count,1]<-quallist[m,1] + Df1[count,4]<-paste(phen, collapse = '|') + count<-count+1 + Df1[count,1]<-quallist[m,1] + Df1[count,4]<-"FBdv:00007008" + Df1[count,3]<-"dv_qual" + Df1[count,5]<-"Developmental stage qualifier" + Df1[count,7]<-"0,1" + count<-count+1 + } + } + if(quallist[m,2]=='genetic interaction'){ + Df1[count,1]<-quallist[m,1] + Df1[count,4]<-paste(phen, collapse = '|') + Df1[count,3]<-"int_qual" + Df1[count,5]<-"Interaction qualifier - ONLY USE FOR INTERACTIONS" + count<-count+1 + } +} + + +for (j in 1:nrow(Df1)){ +if (is.na(Df1[j,2])==TRUE){Df1[j,2]<-as.character("is_a")} + else{} +if (is.na(Df1[j,3])==TRUE){Df1[j,3]<-as.character("qual")} + else{} +if (is.na(Df1[j,5])==TRUE){Df1[j,5]<-as.character("Qualifier")} + else{} +if (is.na(Df1[j,6])==TRUE){Df1[j,6]<-as.character("")} + else{} +if (is.na(Df1[j,7])==TRUE){Df1[j,7]<-as.character("*")} + else{} +if (is.na(Df1[j,8])==TRUE){Df1[j,8]<-as.character("user")} + else{} +} + +write.table(Df1, file = "./canto/extension_config.tsv", sep='\t', quote= FALSE, row.names = FALSE) \ No newline at end of file diff --git a/extension_config-Rscript/list-to-extension_config.sh b/extension_config-Rscript/list-to-extension_config.sh new file mode 100644 index 0000000000000000000000000000000000000000..e2c7a015a37b03ff014645f76e8be0e33372f1df --- /dev/null +++ b/extension_config-Rscript/list-to-extension_config.sh @@ -0,0 +1,19 @@ +#scipt(s) to configure extensions for Canto from 'flybase_controlled_vocabulary.obo' + +# 1 - to eliminate irrelevant text from the obo file. +sed -n -e '/\[Term\]/,$p' ./import_export/flybase_controlled_vocabulary.obo | perl -pe 's/(^\[Term\])//g' | perl -pe 's/(^\[Typedef\])//g' | perl -pe 's/: /\t/g' > ./extension_config-Rscript/tempFBcvobo.txt + + +# 2 - this Rscript creates an extension_config.tsv file that defines which qualifiers (extensions in pombase lingo) can go with which type of annotation, specified in 'allowed_qualifiers.tsv' +# The list of allowed qualifiers is an array of namespaces +# The annotation types are defined as the top term IDs for the type of annotation + # phenotypic class - FBcv:0000347 + # anatomy/manifests uses two ontologies and thus needs two ids: + # flybase anatomy - FBbt:10000000 + # GO CC - GO:0005575. +# the resulting extension_config.tsv only uses IDs and, therefore, the qualifier namespaces must be converted into topmost IDs of that namespace, whcih requires full processing of the FBcv obo file. +# allowed_qualifiers.tsv or test-script3.R may have to be changed if a) the ontologies change top terms, b) mortality- and fertility-related terms are re-organised, or c) the list of qualifiers changes + +Rscript ./extension_config-Rscript/list-to-extension_config.R + +rm ./extension_config-Rscript/tempFBcvobo.txt \ No newline at end of file diff --git a/starting-pack.sh b/starting-pack.sh new file mode 100644 index 0000000000000000000000000000000000000000..a50ece6396c21a0c165229cf9bcfd5cd907e1442 --- /dev/null +++ b/starting-pack.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +#before start, add the content of the folder 'starting-pack' to ./canto-space/ +#run this @ /canto-space/ + +#add canto_deploy.yaml (the configuration file) to ./canto-space/canto/ +##comment for vitor - add username and password for chado @ deneb? +mv canto_deploy.yaml ./canto/ + + +#enable server mode + +##comment for James: make sure the CANTO-SPACE variable of canto-docker-initd has the correct filepath +##create a canto-docker-initd file @ /sbin/ and replace the one @ ./canto/etc/ +##make them executable +cp canto-docker-initd /sbin/ +chmod a+x /sbin/canto-docker-initd +cp canto-docker-initd ./canto/etc/ +chmod a+x ./canto/etc/canto-docker-initd +rm canto-docker-initd + +##create a 'canto' file @ /etc/init.d +##and make it executable +mv canto_for_etc-initd /etc/init.d/canto +chmod a+x /etc/init.d/canto + + +#enable memcached - for caching of the servers +##comment for vitor: not sure it needs more stuff - ask Kim?? +sudo apt-get install memcached + + +#this loop creates cv terms for all the range of values for the 'priority curation score' within the internal canto database: 0-36 +for i in {0..36}; do +score="sudo ./canto/script/canto_docker ./script/canto_add.pl --cvterm \"Canto curation priorities\" "$i" FB:cantoscore"$i; +$score +done + + +#canfigure the taxon (Drosophila melanogaster) +sudo ./canto/script/canto_docker ./script/canto_add.pl --organism "Drosophila melanogaster" 7227 [fruit fly] + + +#start canto +/etc/init.d/canto start + + +#run the weekly routine script which executes all configuration steps +sh weekly_routine.sh + + diff --git a/weekly_routine.sh b/weekly_routine.sh new file mode 100644 index 0000000000000000000000000000000000000000..592a5a3da5a02c50912157bb303020e9c03f9b38 --- /dev/null +++ b/weekly_routine.sh @@ -0,0 +1,85 @@ +#!/bin/sh +#weekly routine on Sunday pm /Monday am. This script should be run from /canto-space/ + +# copy/update of ontologies from /data/export/curfiles/ontologies/trunk/ into 'canto-space/import_export' +## gm comment: I don't really know how rsync works so I didn't know which options to choose (so didn't try to!) and the syntax may not be correct, but the first path should be OK if Canto is installed on the current vm. +## vt comment: the second path should be to /canto-space/import_export/ + +function update_obo_file() +{ + FILE_NAME=${1} + +# first, check that the FILE exists and svn update if so + + if [ -e "/data/export/curfiles/ontologies/trunk/${FILE_NAME}" ] ; then + + echo "Updating ${FILE_NAME} ..." + /usr/bin/rsync /data/export/curfiles/ontologies/trunk/${FILE_NAME} ./import_export/${FILE_NAME} + + else + + echo "'${FILE_NAME}' does not exist as a FILE, skipping ..." + + fi + +} + +for FILE in "fly_anatomy.obo" "flybase_controlled_vocabulary.obo" "fly_development.obo" "go-basic.obo"; do + update_obo_file ${FILE} +done + + + +#replace merged ontology and reload all ontologies to Canto +##vt comment: Ideally, add the following 'if' routine . If hard to implement, remove the 'if' routine and make the three commands run by default + +if <any ontology has been changed>, then + +# redo/replace merged FBbt-GO.obo ontology + sh ./FBbt-GO_routine/FBbt-GO_routine.sh +# replace extension_config.tsv + sh ./extension_config-Rscript/list-to-extension_config.sh +# reload the ontologies and extension configuration + sudo ./canto/script/canto_docker ./script/canto_load.pl --process-extension-config --ontology /import_export/FBbt-GO_test2.obo --ontology /import_export/fly_development.obo --ontology /import_export/flybase_controlled_vocabulary.obo + +fi + + +#update of database name in canto_deploy.yaml +##vt comment: need lots of help here (as per point 6 in https://docs.google.com/document/d/19C-J8sJmZb_OSluxyzBWJxUkdR_N4sIpgjHI7u5pp0I/edit) + ## query SELECT name FROM chado WHERE type='production' AND current='t'; > $ + ## to replace the value of dbi:Pg:dbname (i.e. 'flybase') in: + ## Model::ChadoModel: + ## connect_info: + ## "dbi:Pg:dbname=flybase; host=deneb.pdn.cam.ac.uk" + + +#data import (using Gillian's scripts in the vm - see point 7.d in https://docs.google.com/document/d/19C-J8sJmZb_OSluxyzBWJxUkdR_N4sIpgjHI7u5pp0I/edit) +### gm comment: the following 'if' command should work +### run the script to generate new information into canto ONLY if the fbrf_input_list.tsv file exists + +if [ -e "./fbrf_input_list.tsv" ] ; then + +# make the json input file + /usr/bin/perl /data/export/support_scripts/canto_json_input_maker.pl /data/export/support_scripts/modules_server.cfg ./fbrf_input_list.tsv > ./import-fbrfs.json + +# load the json file into canto + sudo ./canto/script/canto_docker ./script/canto_add.pl --sessions-from-json ./import-fbrfs.json vmt25@cam.ac.uk 7227 + +### remove the fbrf_input_list.tsv file once done, so that the script doesn't try to add the same information again next time its run + /bin/rm ./fbrf_input_list.tsv + +fi + + +#reset cache (restart memcached) +/etc/init.d/memcached restart + +#canto restart +/etc/init.d/canto restart + + + + + +