| Class | Bio::PSORT::PSORT2::Report |
| In: |
lib/bio/appl/psort/report.rb
|
| Parent: | Object |
| definition | [RW] | Definition of query sequence. |
| entry_id | [RW] | entry_id of query sequence. |
| features | [RW] | Feature vector used the kNN prediction. |
| k | [RW] | k parameter of k-nearest neighbors classifier. |
| pred | [RW] | Predicted subcellular localization (three letters code). |
| prob | [RW] | Probability vector of kNN prediction. |
| raw | [RW] | Raw text of output report. |
| scl | [RW] | Given subcellular localization (three letters code). |
| seq | [RW] | Sequence of query sequence. |
Parser for the default report format. ``psort report’’ output.
# File lib/bio/appl/psort/report.rb, line 273
273: def self.default_parser(ent, entry_id = nil)
274: report = self.new(ent, entry_id)
275: ent = ent.split(/\n\n/).map {|e| e.chomp }
276:
277: report.set_header_line(ent[0])
278:
279: # feature matrix
280: ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe|
281: pair = fe.split(/: /)
282: report.features[pair[0].strip] = pair[1].strip.to_f
283: }
284:
285: report.prob = self.set_kNN_prob(ent[2])
286: report.set_prediction(ent[3])
287:
288: return report
289: end
Divides entry body
# File lib/bio/appl/psort/report.rb, line 392
392: def self.divent(entry)
393: boundary = entry.index(BOUNDARY)
394: return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
395: end
Constructs aBio::PSORT::PSORT2::Report object.
# File lib/bio/appl/psort/report.rb, line 227
227: def initialize(raw = '', entry_id = nil, scl = nil, definition = nil,
228: seq = nil, k = nil, features = {}, prob = {}, pred = nil)
229: @entry_id = entry_id
230: @scl = scl
231: @definition = definition
232: @seq = seq
233: @features = features
234: @prob = prob
235: @pred = pred
236: @k = k
237: @raw = raw
238: end
Parses output report with output format detection automatically.
# File lib/bio/appl/psort/report.rb, line 242
242: def self.parser(str, entry_id)
243: case str
244: when /^ psg:/ # default report
245: self.default_parser(str, entry_id)
246: when /^PSG:/ # -v report
247: self.v_parser(str, entry_id)
248: when /: too short length /
249: self.too_short_parser(str, entry_id)
250: when /PSORT II server/
251: tmp = self.new(ent, entry_id)
252: else
253: raise ArgumentError, "invalid format\n[#{str}]"
254: end
255: end
Returns @prob value.
# File lib/bio/appl/psort/report.rb, line 309
309: def self.set_kNN_prob(str)
310: prob = Hash.new
311: Bio::PSORT::PSORT2::SclNames.keys.each {|a|
312: prob.update( {a => 0.0} )
313: }
314: str.gsub(/\t/,'').split(/\n/).each {|a|
315: val,scl = a.strip.split(/ %: /)
316: key = Bio::PSORT::PSORT2::SclNames.index(scl)
317: prob[key] = val.to_f
318: }
319: return prob
320: end
Parser for ``too short length’’ report.
$id: too short length ($leng), skipped\n";
# File lib/bio/appl/psort/report.rb, line 260
260: def self.too_short_parser(ent, entry_id = nil)
261: report = self.new(ent)
262: report.entry_id = entry_id
263: if ent =~ /^(.+)?: too short length/
264: report.entry_id = $1 unless report.entry_id
265: report.scl = '---'
266: end
267: report
268: end
Parser for the verbose output report format. ``psort -v report’’ and WWW server output.
# File lib/bio/appl/psort/report.rb, line 338
338: def self.v_parser(ent, entry_id = nil)
339: report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
340:
341: ent = ent.split(/\n\n/).map {|e| e.chomp }
342: ent.each_with_index {|e, i|
343: unless /^(\w|-|\>|\t)/ =~ e
344: j = self.__send__(:search_j, i, ent)
345: ent[i - j] += e
346: ent[i] = nil
347: end
348: if /^none/ =~ e # psort output bug
349: j = self.__send__(:search_j, i, ent)
350: ent[i - j] += e
351: ent[i] = nil
352: end
353: }
354: ent.compact!
355:
356: if /^ PSORT II server/ =~ ent[0] # for WWW version
357: ent.shift
358: delline = ''
359: ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
360: i = ent.index(delline)
361: ent.delete(delline)
362: ent.delete_at(i - 1)
363: end
364:
365: report.set_header_line(ent.shift)
366: report.seq = Bio::Sequence::AA.new(ent.shift)
367:
368: fent, pent = self.divent(ent)
369: report.set_features(fent)
370: report.prob = self.set_kNN_prob(pent[0].strip)
371: report.set_prediction(pent[1].strip)
372:
373: return report
374: end
Sets @features values.
# File lib/bio/appl/psort/report.rb, line 398
398: def set_features(features_ary)
399: features_ary.each {|fent|
400: key = fent.split(/\:( |\n)/)[0].strip
401: self.features[key] = fent # unless /^\>/ =~ key
402: }
403: self.features['AA'] = self.seq.length
404: end
Returns header information.
# File lib/bio/appl/psort/report.rb, line 292
292: def set_header_line(str)
293: str.sub!(/^-+\n/,'')
294: tmp = str.split(/\t| /)
295: @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
296:
297: case tmp.join(' ').chomp
298: when /\(\d+ aa\) (.+)$/
299: @definition = $1
300: else
301: @definition = tmp.join(' ').chomp
302: end
303: scl = @definition.split(' ')[0]
304:
305: @scl = scl if SclNames.keys.index(scl)
306: end
Returns @prob and @k values.
# File lib/bio/appl/psort/report.rb, line 323
323: def set_prediction(str)
324: case str
325: when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
326: @entry_id ||= $1 unless @entry_id
327: @pred = $2
328: @k = $3
329: else
330: raise ArgumentError,
331: "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
332: end
333: end