| Class | Bio::EMBL |
| In: |
lib/bio/db/embl/embl.rb
|
| Parent: | EMBLDB |
| oc | -> | classification |
| taxonomy classfication | ||
database references (DR). Returns an array of Bio::Sequence::DBLink objects.
# File lib/bio/db/embl/embl.rb, line 437
437: def dblinks
438: get('DR').split(/\n/).collect { |x|
439: Bio::Sequence::DBLink.parse_embl_DR_line(x)
440: }
441: end
returns DIVISION in the ID line.
# File lib/bio/db/embl/embl.rb, line 140
140: def division
141: id_line('DIVISION')
142: end
returns contents in the date (DT) line.
where <DT Hash> is:
{}
keys: ‘created’ and ‘updated‘
DT Line; date (2/entry)
# File lib/bio/db/embl/embl.rb, line 182
182: def dt(key=nil)
183: unless @data['DT']
184: tmp = Hash.new
185: dt_line = self.get('DT').split(/\n/)
186: tmp['created'] = dt_line[0].sub(/\w{2} /,'').strip
187: tmp['updated'] = dt_line[1].sub(/\w{2} /,'').strip
188: @data['DT'] = tmp
189: end
190: if key
191: @data['DT'][key]
192: else
193: @data['DT']
194: end
195: end
returns ENTRY_NAME in the ID line.
# File lib/bio/db/embl/embl.rb, line 117
117: def entry
118: id_line('ENTRY_NAME')
119: end
returns contents in the feature table (FT) lines.
same as features method in bio/db/genbank.rb
FT Line; feature table data (>=0)
# File lib/bio/db/embl/embl.rb, line 262
262: def ft
263: unless @data['FT']
264: ary = Array.new
265: in_quote = false
266: @orig['FT'].each_line do |line|
267: next if line =~ /^FEATURES/
268:
269: head = line[0,20].strip # feature key (source, CDS, ...)
270: body = line[20,60].chomp # feature value (position, /qualifier=)
271: if line =~ /^FT {3}(\S+)/
272: ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
273: elsif body =~ /^ \// and not in_quote
274: ary.last.push(body) # /q="data..., /q=data, /q
275:
276: if body =~ /=" / and body !~ /"$/
277: in_quote = true
278: end
279:
280: else
281: ary.last.last << body # ...data..., ...data..."
282:
283: if body =~ /"$/
284: in_quote = false
285: end
286: end
287: end
288:
289: ary.map! do |subary|
290: parse_qualifiers(subary)
291: end
292:
293: @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
294: end
295: if block_given?
296: @data['FT'].each do |feature|
297: yield feature
298: end
299: else
300: @data['FT']
301: end
302: end
returns contents in the ID line.
where <ID Hash> is:
{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
ID Line
"ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
DATA_CLASS = [‘standard’]
MOLECULE_TYPE: DNA RNA XXX
Code ( DIVISION )
EST (ESTs) PHG (Bacteriophage) FUN (Fungi) GSS (Genome survey) HTC (High Throughput cDNAs) HTG (HTGs) HUM (Human) INV (Invertebrates) ORG (Organelles) MAM (Other Mammals) VRT (Other Vertebrates) PLN (Plants) PRO (Prokaryotes) ROD (Rodents) SYN (Synthetic) STS (STSs) UNC (Unclassified) VRL (Viruses)
Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
# File lib/bio/db/embl/embl.rb, line 89
89: def id_line(key=nil)
90: unless @data['ID']
91: tmp = Hash.new
92: idline = fetch('ID').split(/; +/)
93: tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
94: if idline.first =~ /^SV/
95: tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
96: tmp['TOPOLOGY'] = idline.shift
97: tmp['MOLECULE_TYPE'] = idline.shift
98: tmp['DATA_CLASS'] = idline.shift
99: else
100: tmp['MOLECULE_TYPE'] = idline.shift
101: end
102: tmp['DIVISION'] = idline.shift
103: tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
104:
105: @data['ID'] = tmp
106: end
107:
108: if key
109: @data['ID'][key]
110: else
111: @data['ID']
112: end
113: end
returns MOLECULE_TYPE in the ID line.
# File lib/bio/db/embl/embl.rb, line 125
125: def molecule
126: id_line('MOLECULE_TYPE')
127: end
release number when created
# File lib/bio/db/embl/embl.rb, line 397
397: def release_created
398: parse_release_version(self.dt['created'])[0]
399: end
release number when last updated
# File lib/bio/db/embl/embl.rb, line 392
392: def release_modified
393: parse_release_version(self.dt['updated'])[0]
394: end
returns the nucleotie sequence in this entry.
@orig[’’] as sequence bb Line; (blanks) sequence data (>=1)
# File lib/bio/db/embl/embl.rb, line 371
371: def seq
372: Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
373: end
returns sequence header information in the sequence header (SQ) line.
where <SQ Hash> is:
{'ntlen' => Int, 'other' => Int,
'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
SQ Line; sequence header (1/entry)
SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
# File lib/bio/db/embl/embl.rb, line 348
348: def sq(base = nil)
349: unless @data['SQ']
350: fetch('SQ') =~ \
351: /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
352: @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
353: 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
354: else
355: @data['SQ']
356: end
357:
358: if base
359: @data['SQ'][base.downcase]
360: else
361: @data['SQ']
362: end
363: end
returns the version information in the sequence version (SV) line.
SV Line; sequence version (1/entry)
SV Accession.Version
# File lib/bio/db/embl/embl.rb, line 162
162: def sv
163: if (v = field_fetch('SV').sub(/;/,'')) == ""
164: [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.')
165: else
166: v
167: end
168: end
converts the entry to Bio::Sequence object
Arguments::
| Returns: | Bio::Sequence object |
# File lib/bio/db/embl/embl.rb, line 459
459: def to_biosequence
460: Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
461: end