| Module | Bio::NCBIDB::Common |
| In: |
lib/bio/db/genbank/common.rb
|
This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.
| DELIMITER | = | RS = "\n//\n" |
| TAGSIZE | = | 12 |
# File lib/bio/db/genbank/common.rb, line 30
30: def initialize(entry)
31: super(entry, TAGSIZE)
32: end
Returns the ACCESSION part of the acc_version.
# File lib/bio/db/genbank/common.rb, line 62
62: def accession
63: acc_version.split(/\./).first.to_s
64: end
ACCESSION — Returns contents of the ACCESSION record as an Array.
# File lib/bio/db/genbank/common.rb, line 46
46: def accessions
47: field_fetch('ACCESSION').strip.split(/\s+/)
48: end
# File lib/bio/db/genbank/common.rb, line 120
120: def common_name
121: source['common_name']
122: end
FEATURES — Returns contents of the FEATURES record as an array of Bio::Feature objects.
# File lib/bio/db/genbank/common.rb, line 207
207: def features
208: unless @data['FEATURES']
209: ary = []
210: in_quote = false
211: get('FEATURES').each_line do |line|
212: next if line =~ /^FEATURES/
213:
214: # feature type (source, CDS, ...)
215: head = line[0,20].to_s.strip
216:
217: # feature value (position or /qualifier=)
218: body = line[20,60].to_s.chomp
219:
220: # sub-array [ feature type, position, /q="data", ... ]
221: if line =~ /^ {5}\S/
222: ary.push([ head, body ])
223:
224: # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
225: elsif body =~ /^ \// and not in_quote # gb:IRO125195
226: ary.last.push(body)
227:
228: # flag for open quote (/q="data...)
229: if body =~ /="/ and body !~ /"$/
230: in_quote = true
231: end
232:
233: # feature qualifier continued (...data..., ...data...")
234: else
235: ary.last.last << body
236:
237: # flag for closing quote (/q="data... lines ...")
238: if body =~ /"$/
239: in_quote = false
240: end
241: end
242: end
243:
244: ary.collect! do |subary|
245: parse_qualifiers(subary)
246: end
247:
248: @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
249: end
250: if block_given?
251: @data['FEATURES'].each do |f|
252: yield f
253: end
254: else
255: @data['FEATURES']
256: end
257: end
KEYWORDS — Returns contents of the KEYWORDS record as an Array of Strings.
# File lib/bio/db/genbank/common.rb, line 84
84: def keywords
85: @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
86: end
LOCUS — Locus class must be defined in child classes.
# File lib/bio/db/genbank/common.rb, line 35
35: def locus
36: # must be overrided in each subclass
37: end
ORIGIN — Returns contents of the ORIGIN record as a String.
# File lib/bio/db/genbank/common.rb, line 261
261: def origin
262: unless @data['ORIGIN']
263: ori, seqstr = get('ORIGIN').split("\n", 2)
264: seqstr ||= ""
265: @data['ORIGIN'] = truncate(tag_cut(ori))
266: @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
267: end
268: @data['ORIGIN']
269: end
REFERENCE — Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
# File lib/bio/db/genbank/common.rb, line 136
136: def references
137: unless @data['REFERENCE']
138: ary = []
139: toptag2array(get('REFERENCE')).each do |ref|
140: hash = Hash.new
141: subtag2array(ref).each do |field|
142: case tag_get(field)
143: when /REFERENCE/
144: if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
145: hash['embl_gb_record_number'] = $1.to_i
146: if $3 and $3 != 'sites' then
147: seqpos = $3
148: seqpos.sub!(/\A\s*bases\s+/, '')
149: seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
150: seqpos.gsub!(/\s*\;\s*/, ', ')
151: hash['sequence_position'] = seqpos
152: end
153: end
154: when /AUTHORS/
155: authors = truncate(tag_cut(field))
156: authors = authors.split(/, /)
157: authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
158: authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
159: hash['authors'] = authors
160: when /TITLE/
161: hash['title'] = truncate(tag_cut(field)) + '.'
162: when /JOURNAL/
163: journal = truncate(tag_cut(field))
164: if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
165: hash['journal'] = $1
166: hash['volume'] = $2
167: hash['issue'] = $3
168: hash['pages'] = $4
169: hash['year'] = $5
170: else
171: hash['journal'] = journal
172: end
173: when /MEDLINE/
174: hash['medline'] = truncate(tag_cut(field))
175: when /PUBMED/
176: hash['pubmed'] = truncate(tag_cut(field))
177: when /REMARK/
178: hash['comments'] ||= []
179: hash['comments'].push truncate(tag_cut(field))
180: end
181: end
182: ary.push(Reference.new(hash))
183: end
184: @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
185: end
186: if block_given?
187: @data['REFERENCE'].each do |r|
188: yield r
189: end
190: else
191: @data['REFERENCE']
192: end
193: end
SOURCE — Returns contents of the SOURCE record as a Hash.
# File lib/bio/db/genbank/common.rb, line 96
96: def source
97: unless @data['SOURCE']
98: name, org = get('SOURCE').split('ORGANISM')
99: org ||= ""
100: if org[/\S+;/]
101: organism = $`
102: taxonomy = $& + $'
103: elsif org[/\S+\./] # rs:NC_001741
104: organism = $`
105: taxonomy = $& + $'
106: else
107: organism = org
108: taxonomy = ''
109: end
110: @data['SOURCE'] = {
111: 'common_name' => truncate(tag_cut(name)),
112: 'organism' => truncate(organism),
113: 'taxonomy' => truncate(taxonomy),
114: }
115: @data['SOURCE'].default = ''
116: end
117: @data['SOURCE']
118: end
Returns the VERSION part of the acc_version as a Fixnum
# File lib/bio/db/genbank/common.rb, line 67
67: def version
68: acc_version.split(/\./).last.to_i
69: end