| Class | Bio::SQL::Sequence |
| In: |
lib/bio/db/biosql/sequence.rb
|
| Parent: | Object |
| entry | [R] |
# File lib/bio/db/biosql/sequence.rb, line 89
89: def initialize(options={})
90: options.assert_valid_keys(:entry, :biodatabase_id,:biosequence)
91: return @entry = options[:entry] unless options[:entry].nil?
92:
93: return to_biosql(options[:biosequence], options[:biodatabase_id]) unless options[:biosequence].nil? or options[:biodatabase_id].nil?
94:
95: end
return the seqfeature mapped from BioSQL with a type_term like ‘CDS‘
# File lib/bio/db/biosql/sequence.rb, line 294
294: def cdsfeatures
295: @entry.cdsfeatures
296: end
# File lib/bio/db/biosql/sequence.rb, line 392
392: def comment=(value)
393: comment=Comment.new(:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ)
394: comment.save!
395: end
# File lib/bio/db/biosql/sequence.rb, line 362
362: def comments
363: @entry.comments.map do |comment|
364: comment.comment_text
365: end
366: end
# File lib/bio/db/biosql/sequence.rb, line 217
217: def database
218: @entry.biodatabase.name
219: end
# File lib/bio/db/biosql/sequence.rb, line 221
221: def database_desc
222: @entry.biodatabase.description
223: end
# File lib/bio/db/biosql/sequence.rb, line 242
242: def description
243: @entry.description
244: end
# File lib/bio/db/biosql/sequence.rb, line 247
247: def description=(value)
248: @entry.description=value
249: end
# File lib/bio/db/biosql/sequence.rb, line 238
238: def division=(value)
239: @entry.division=value
240: end
# File lib/bio/db/biosql/sequence.rb, line 273
273: def feature=(feat)
274: #ToDo: avoid Ontology find here, probably more efficient create class variables
275: type_term_ontology = Ontology.find_or_create_by_name('SeqFeature Keys')
276: type_term = Term.find_or_create_by_name(:name=>feat.feature, :ontology=>type_term_ontology)
277: source_term_ontology = Ontology.find_or_create_by_name('SeqFeature Sources')
278: source_term = Term.find_or_create_by_name(:name=>'EMBLGenBankSwit',:ontology=>source_term_ontology)
279: seqfeature = Seqfeature.create(:bioentry=>@entry, :source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>'')
280: #seqfeature.save!
281: feat.locations.each do |loc|
282: location = Location.new(:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ)
283: location.save!
284: end
285: qual_term_ontology = Ontology.find_or_create_by_name('Annotation Tags')
286: feat.each do |qualifier|
287: qual_term = Term.find_or_create_by_name(:name=>qualifier.qualifier, :ontology=>qual_term_ontology)
288: qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ)
289: qual.save!
290: end
291: end
# File lib/bio/db/biosql/sequence.rb, line 268
268: def features
269: @entry.seqfeatures.collect {|sf|
270: self.get_seqfeature(sf)}
271: end
# File lib/bio/db/biosql/sequence.rb, line 76
76: def get_seqfeature(sf)
77:
78: #in seqfeature BioSQL class
79: locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
80: #pp sf.locations.inspect
81: locations_str = "join(#{locations_str})" if sf.locations.count>1
82: Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
83: end
# File lib/bio/db/biosql/sequence.rb, line 256
256: def identifier=(value)
257: @entry.identifier=value
258: end
# File lib/bio/db/biosql/sequence.rb, line 334
334: def length
335: @entry.biosequence.length
336: end
# File lib/bio/db/biosql/sequence.rb, line 85
85: def length=(len)
86: @entry.biosequence.length=len
87: end
# File lib/bio/db/biosql/sequence.rb, line 184
184: def name=(value)
185: @entry.name=value
186: end
TODO def secondary_accession
@entry.bioentry_qualifier_values
end
# File lib/bio/db/biosql/sequence.rb, line 201
201: def organism
202: @entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
203: end
# File lib/bio/db/biosql/sequence.rb, line 206
206: def organism=(value)
207: taxon_name=TaxonName.find_by_name_and_name_class(value.gsub(/\s+\(.+\)/,''),'scientific name')
208: if taxon_name.nil?
209: puts "Error value doesn't exists in taxon_name table with scientific name constraint."
210: else
211: @entry.taxon_id=taxon_name.taxon_id
212: @entry.save!
213: end
214: end
# File lib/bio/db/biosql/sequence.rb, line 189
189: def primary_accession
190: @entry.accession
191: end
# File lib/bio/db/biosql/sequence.rb, line 193
193: def primary_accession=(value)
194: @entry.accession=value
195: end
# File lib/bio/db/biosql/sequence.rb, line 369
369: def reference=(value)
370:
371: locations=Array.new
372: locations << "journal=#{value.journal}" unless value.journal.empty?
373: locations << "volume=#{value.volume}" unless value.volume.empty?
374: locations << "issue=#{value.issue}" unless value.issue.empty?
375: locations << "pages=#{value.pages}" unless value.pages.empty?
376: locations << "year=#{value.year}" unless value.year.empty?
377: locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
378: locations << "medline=#{value.medline}" unless value.medline.empty?
379: locations << "doi=#{value.doi}" unless value.doi.nil?
380: locations << "abstract=#{value.abstract}" unless value.abstract.empty?
381: locations << "url=#{value.url}" unless value.url.nil?
382: locations << "mesh=#{value.mesh}" unless value.mesh.empty?
383: locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
384: locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
385: start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil]
386: reference=Reference.find_or_create_by_title(:title=>value.title, :authors=>value.authors.join(' '), :location=>locations.join('|'))
387:
388: bio_reference=BioentryReference.new(:bioentry=>@entry,:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos)
389: bio_reference.save!
390: end
# File lib/bio/db/biosql/sequence.rb, line 338
338: def references
339: #return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
340: #probably would be better to d a class refrence to collect these informations
341: @entry.bioentry_references.collect do |bio_ref|
342: hash = Hash.new
343: hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/)
344:
345: hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
346: hash['title'] = bio_ref.reference.title
347: hash['embl_gb_record_number'] = bio_ref.rank
348: #TODO: solve the problem with specific comment per reference.
349: #TODO: get dbxref
350: #take a look when location is build up in def reference=(value)
351:
352: bio_ref.reference.location.split('|').each do |element|
353: key,value=element.split('=')
354: hash[key]=value
355: end unless bio_ref.reference.location.nil?
356:
357: hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
358: Bio::Reference.new(hash)
359: end
360: end
# File lib/bio/db/biosql/sequence.rb, line 397
397: def save
398: #I should add chks for SQL errors
399: @entry.biosequence.save!
400: @entry.save!
401: end
Returns the sequence. Returns a Bio::Sequence::Generic object.
# File lib/bio/db/biosql/sequence.rb, line 301
301: def seq
302: s = @entry.biosequence
303: Bio::Sequence::Generic.new(s ? s.seq : '')
304: end
# File lib/bio/db/biosql/sequence.rb, line 306
306: def seq=(value)
307:
308: #chk which type of alphabet is, NU/NA/nil
309: if @entry.biosequence.nil?
310: # puts "intoseq1"
311: @entry.biosequence = Biosequence.new(:seq=>value)
312: @entry.biosequence.save!
313:
314: else
315: @entry.biosequence.seq=value
316: end
317: self.length=value.length
318: #@entry.biosequence.length=value.length
319: #break
320: @entry.save!
321: end
# File lib/bio/db/biosql/sequence.rb, line 323
323: def taxonomy
324: tax = []
325: taxon = @entry.taxon
326: while taxon and taxon.taxon_id != taxon.parent_taxon_id
327: tax << taxon.taxon_scientific_name.name
328: #Note: I don't like this call very much, correct with a relationship in the ref class.
329: taxon = Taxon.find(taxon.parent_taxon_id)
330: end
331: tax.reverse
332: end
# File lib/bio/db/biosql/sequence.rb, line 415
415: def to_biosequence
416: Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
417: end
# File lib/bio/db/biosql/sequence.rb, line 97
97: def to_biosql(bs,biodatabase_id)
98: #Transcaction works greatly!!!
99:
100: #
101: begin
102: Bioentry.transaction do
103:
104: @entry = Bioentry.new(:biodatabase_id=>biodatabase_id, :name=>bs.entry_id)
105:
106: puts "primary" if $DEBUG
107: self.primary_accession = bs.primary_accession
108:
109: puts "def" if $DEBUG
110: self.definition = bs.definition unless bs.definition.nil?
111:
112: puts "seqver" if $DEBUG
113: self.sequence_version = bs.sequence_version || 0
114:
115: puts "divi" if $DEBUG
116: self.division = bs.division unless bs.division.nil?
117:
118: @entry.save!
119: puts "secacc" if $DEBUG
120:
121: bs.secondary_accessions.each do |sa|
122: #write as qualifier every secondary accession into the array
123: self.secondary_accessions = sa
124: end unless bs.secondary_accessions.nil?
125:
126:
127: #to create the sequence entry needs to exists
128: puts "seq" if $DEBUG
129: puts bs.seq if $DEBUG
130: self.seq = bs.seq unless bs.seq.nil?
131: puts "mol" if $DEBUG
132:
133: self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
134: puts "dc" if $DEBUG
135:
136: self.data_class = bs.data_class unless bs.data_class.nil?
137: puts "top" if $DEBUG
138: self.topology = bs.topology unless bs.topology.nil?
139: puts "datec" if $DEBUG
140: self.date_created = bs.date_created unless bs.date_created.nil?
141: puts "datemod" if $DEBUG
142: self.date_modified = bs.date_modified unless bs.date_modified.nil?
143: puts "key" if $DEBUG
144:
145: bs.keywords.each do |kw|
146: #write as qualifier every secondary accessions into the array
147: self.keywords = kw
148: end unless bs.keywords.nil?
149: #FIX: problem settinf taxon_name: embl has "Arabidopsis thaliana (thale cress)" but in taxon_name table there isn't this name. I must check if there is a new version of the table
150: puts "spec" if $DEBUG
151: self.species = bs.species unless bs.species.nil?
152: puts "Debug: #{bs.species}" if $DEBUG
153: puts "Debug: feat..start" if $DEBUG
154:
155: bs.features.each do |feat|
156: self.feature=feat
157: end unless bs.features.nil?
158: puts "Debug: feat...end" if $DEBUG
159:
160: #TODO: add comments and references
161: bs.references.each do |reference|
162: # puts reference.inspect
163: self.reference=reference
164: end unless bs.references.nil?
165:
166: bs.comments.each do |comment|
167: self.comment=comment
168: end unless bs.comments.nil?
169:
170: end #transaction
171: return self
172: rescue Exception => e
173: puts "to_biosql exception: #{e}"
174: puts $!
175: end #rescue
176: end
# File lib/bio/db/biosql/sequence.rb, line 402
402: def to_fasta
403: #prima erano 2 print in stdout, meglio ritornare una stringa in modo che poi ci si possa fare quello che si vuole
404: #print ">" + accession + "\n"
405: #print seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
406: ">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
407: end
# File lib/bio/db/biosql/sequence.rb, line 409
409: def to_fasta_reverse_complememt
410: ">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
411: end