| Module | Bio::EMBLDB::Common |
| In: |
lib/bio/db/embl/common.rb
|
| DELIMITER | = | "\n//\n" |
| RS | = | DELIMITER |
| TAGSIZE | = | 5 |
# File lib/bio/db/embl/common.rb, line 86
86: def initialize(entry)
87: super(entry, TAGSIZE)
88: end
returns a Array of accession numbers in the AC lines.
AC Line
"AC A12345; B23456;" AC [AC1;]+
Accession numbers format:
1 2 3 4 5 6 [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]
# File lib/bio/db/embl/common.rb, line 99
99: def ac
100: unless @data['AC']
101: tmp = Array.new
102: field_fetch('AC').split(/ /).each do |e|
103: tmp.push(e.sub(/;/,''))
104: end
105: @data['AC'] = tmp
106: end
107: @data['AC']
108: end
returns contents in the DR line.
where <Database cross-reference Hash> is:
DR Line; defabases cross-reference (>=0) a cross_ref pre one line
"DR database_identifier; primary_identifier; secondary_identifier."
# File lib/bio/db/embl/common.rb, line 329
329: def dr
330: unless @data['DR']
331: tmp = Hash.new
332: self.get('DR').split(/\n/).each do |db|
333: a = db.sub(/^DR /,'').sub(/.$/,'').strip.split(/;[ ]/)
334: dbname = a.shift
335: tmp[dbname] = Array.new unless tmp[dbname]
336: tmp[dbname].push(a)
337: end
338: @data['DR'] = tmp
339: end
340: if block_given?
341: @data['DR'].each do |k,v|
342: yield(k, v)
343: end
344: else
345: @data['DR']
346: end
347: end
returns keywords in the KW line.
KW Line; keyword (>=1)
KW [Keyword;]+
# File lib/bio/db/embl/common.rb, line 220
220: def kw
221: unless @data['KW']
222: if get('KW').size > 0
223: tmp = fetch('KW').sub(/.$/,'')
224: @data['KW'] = tmp.split(/;/).map {|e| e.strip }
225: else
226: @data['KW'] = []
227: end
228: end
229: @data['KW']
230: end
returns contents in the OC line.
OC Line; organism classification (>=1)
OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; OC Theileria.
# File lib/bio/db/embl/common.rb, line 203
203: def oc
204: unless @data['OC']
205: begin
206: @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e|
207: e.strip
208: }
209: rescue NameError
210: nil
211: end
212: end
213: @data['OC']
214: end
returns contents in the OG line.
OG Line; organella (0 or 1/entry)
OG Plastid; Chloroplast. OG Mitochondrion. OG Plasmid sym pNGR234a. OG Plastid; Cyanelle. OG Plasmid pSymA (megaplasmid 1). OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.
# File lib/bio/db/embl/common.rb, line 180
180: def og
181: unless @data['OG']
182: og = Array.new
183: if get('OG').size > 0
184: ogstr = fetch('OG')
185: ogstr.sub!(/\.$/,'')
186: ogstr.sub!(/ and/,'')
187: ogstr.sub!(/;/, ',')
188: ogstr.split(',').each do |tmp|
189: og.push(tmp.strip)
190: end
191: end
192: @data['OG'] = og
193: end
194: @data['OG']
195: end
returns contents in the OS line.
where <OS Hash> is:
[{'name'=>'Human', 'os'=>'Homo sapiens'},
{'name'=>'Rat', 'os'=>'Rattus norveticus'}]
OS Line; organism species (>=1)
"OS Trifolium repens (white clover)" OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name1).
# File lib/bio/db/embl/common.rb, line 148
148: def os(num = nil)
149: unless @data['OS']
150: os = Array.new
151: fetch('OS').split(/, and|, /).each do |tmp|
152: if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
153: org = $1
154: tmp =~ /(\(.+\))/
155: os.push({'name' => $1, 'os' => org})
156: else
157: raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
158: end
159: end
160: @data['OS'] = os
161: end
162: if num
163: # EX. "Trifolium repens (white clover)"
164: "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
165: end
166: @data['OS']
167: end
returns contents in the R lines.
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
# File lib/bio/db/embl/common.rb, line 242
242: def ref
243: unless @data['R']
244: ary = Array.new
245: get('R').split(/\nRN /).each do |str|
246: raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
247: 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
248: str = 'RN ' + str unless /^RN / =~ str
249: str.split("\n").each do |line|
250: if /^(R[NPXARLCTG]) (.+)/ =~ line
251: raw[$1] += $2 + ' '
252: else
253: raise "Invalid format in R lines, \n[#{line}]\n"
254: end
255: end
256: raw.each_value {|v|
257: v.strip!
258: v.sub!(/^"/,'')
259: v.sub!(/;$/,'')
260: v.sub!(/"$/,'')
261: }
262: ary.push(raw)
263: end
264: @data['R'] = ary
265: end
266: @data['R']
267: end
returns Bio::Reference object from Bio::EMBLDB::Common#ref.
# File lib/bio/db/embl/common.rb, line 271
271: def references
272: unless @data['references']
273: ary = self.ref.map {|ent|
274: hash = Hash.new
275: ent.each {|key, value|
276: case key
277: when 'RN'
278: if /\[(\d+)\]/ =~ value.to_s
279: hash['embl_gb_record_number'] = $1.to_i
280: end
281: when 'RC'
282: unless value.to_s.strip.empty?
283: hash['comments'] ||= []
284: hash['comments'].push value
285: end
286: when 'RP'
287: hash['sequence_position'] = value
288: when 'RA'
289: a = value.split(/\, /)
290: a.each do |x|
291: x.sub!(/( [^ ]+)\z/, ",\\1")
292: end
293: hash['authors'] = a
294: when 'RT'
295: hash['title'] = value
296: when 'RL'
297: if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
298: hash['journal'] = $1.rstrip
299: hash['volume'] = $2
300: hash['issue'] = $4
301: hash['pages'] = $6
302: hash['year'] = $7
303: else
304: hash['journal'] = value
305: end
306: when 'RX' # PUBMED, DOI, (AGRICOLA)
307: value.split(/\. /).each {|item|
308: tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
309: hash[ tag.downcase ] = xref
310: }
311: end
312: }
313: Reference.new(hash)
314: }
315: @data['references'] = ary.extend(Bio::References::BackwardCompatibility)
316: end
317: @data['references']
318: end