| Class | Bio::Iprscan::Report |
| In: |
lib/bio/appl/iprscan/report.rb
|
| Parent: | Object |
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
# Read a marged.txt and split each entry.
Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report|
report.query_id
report.matches.size
report.matches.each do |match|
match.ipr_id #=> 'IPR...'
match.ipr_description
match.method
match.accession
match.description
match.match_start
match.match_end
match.evalue
end
# report.to_gff3
# report.to_html
end
Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report|
report.class #=> Bio::Iprscan::Report
end
| RS | = | DELIMITER = "\n\/\/\n" | Entry delimiter pattern. |
| query_id | -> | entry_id |
| crc64 | [RW] | CRC64 checksum of query sequence. |
| matches | [RW] | Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys. |
| query_id | [RW] | Qeury sequence name (entry_id). |
| query_length | [RW] | Qeury sequence length. |
# File lib/bio/appl/iprscan/report.rb, line 236
236: def initialize
237: @query_id = nil
238: @query_length = nil
239: @crc64 = nil
240: @matches = []
241: end
Splits entry stream.
Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report|
report
end
# File lib/bio/appl/iprscan/report.rb, line 194
194: def self.parse_ptxt(io)
195: io.each("\n\/\/\n") do |entry|
196: yield self.parse_ptxt_entry(entry)
197: end
198: end
Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report object.
File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e|
report = Bio::Iprscan::Report.parse_ptxt_entry(e)
end
# File lib/bio/appl/iprscan/report.rb, line 209
209: def self.parse_ptxt_entry(str)
210: report = self.new
211: ipr_line = ''
212: str.split(/\n/).each do |line|
213: line = line.split("\t")
214: if line.size == 2
215: report.query_id = line[0]
216: report.query_length = line[1].to_i
217: elsif line.first == '//'
218: elsif line.first == 'InterPro'
219: ipr_line = line
220: else
221: startp, endp = line[4].split("-")
222: report.matches << Match.new(:ipr_id => ipr_line[1],
223: :ipr_description => ipr_line[2],
224: :method => line[0],
225: :accession => line[1],
226: :description => line[2],
227: :evalue => line[3],
228: :match_start => startp.to_i,
229: :match_end => endp.to_i)
230: end
231: end
232: report
233: end
Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report|
report
end
# File lib/bio/appl/iprscan/report.rb, line 72
72: def self.parse_raw(io)
73: entry = ''
74: while line = io.gets
75: if entry != '' and entry.split("\t").first == line.split("\t").first
76: entry << line
77: elsif entry != ''
78: yield Bio::Iprscan::Report.parse_raw_entry(entry)
79: entry = line
80: else
81: entry << line
82: end
83: end
84: yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
85: end
Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 89
89: def self.parse_raw_entry(str)
90: report = self.new
91: str.split(/\n/).each do |line|
92: line = line.split("\t")
93: report.matches << Match.new(:query_id => line[0],
94: :crc64 => line[1],
95: :query_length => line[2].to_i,
96: :method => line[3],
97: :accession => line[4],
98: :description => line[5],
99: :match_start => line[6].to_i,
100: :match_end => line[7].to_i,
101: :evalue => line[8],
102: :status => line[9],
103: :date => line[10])
104: if line[11]
105: report.matches.last.ipr_id = line[11]
106: report.matches.last.ipr_description = line[12]
107: end
108: report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13]
109: end
110: report.query_id = report.matches.first.query_id
111: report.query_length = report.matches.first.query_length
112: report
113: end
Splits the entry stream.
Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report|
report.class #=> Bio::Iprscan::Report
end
# File lib/bio/appl/iprscan/report.rb, line 130
130: def self.parse_txt(io)
131: io.each("\n\nSequence") do |entry|
132: if entry =~ /Sequence$/
133: entry = entry.sub(/Sequence$/, '')
134: end
135: unless entry =~ /^Sequence/
136: entry = 'Sequence' + entry
137: end
138: yield self.parse_txt_entry(entry)
139: end
140: end
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 147
147: def self.parse_txt_entry(str)
148: unless str =~ /^Sequence /
149: raise ArgumentError, "Invalid format: \n\n#{str}"
150: end
151: header, *matches = str.split(/\n\n/)
152: report = self.new
153: report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end
154: report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end
155: report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end
156: ipr_line = ''
157: go_annotation = ''
158: matches.each do |m|
159: m = m.split(/\n/).map {|x| x.split(/ +/) }
160: m.each do |match|
161: case match[0]
162: when 'method'
163: when /(Molecular Function|Cellular Component|Biological Process):/
164: go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/)
165: when 'InterPro'
166: ipr_line = match
167: else
168: pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */)
169: pos_scores.each do |pos_score|
170: report.matches << Match.new(:ipr_id => ipr_line[1],
171: :ipr_description => ipr_line[2],
172: :method => match[0],
173: :accession => match[1],
174: :description => match[2],
175: :evalue => pos_score[3],
176: :status => pos_score[0],
177: :match_start => pos_score[1].to_i,
178: :match_end => pos_score[2].to_i,
179: :go_terms => go_annotation)
180: end
181: end
182: end
183: end
184: return report
185: end
def format_txt end
# File lib/bio/appl/iprscan/report.rb, line 266
266: def format_raw
267: @matches.map { |match|
268: [self.query_id,
269: self.crc64,
270: self.query_length,
271: match.method_name,
272: match.accession,
273: match.description,
274: match.match_start,
275: match.match_end,
276: match.evalue,
277: match.status,
278: match.date,
279: match.ipr_id,
280: match.ipr_description,
281: match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ')
282: ].join("\t")
283: }.join("\n")
284: end
Output interpro matches in the format_type.
# File lib/bio/appl/iprscan/report.rb, line 245
245: def output(format_type)
246: case format_type
247: when 'raw', :raw
248: format_raw
249: else
250: raise NameError, "Invalid format_type."
251: end
252: end
Returns a Hash (key as an Interpro ID and value as a Match).
report.to_hash.each do |ipr_id, matches|
matches.each do |match|
report.matches.ipr_id == ipr_id #=> true
end
end
# File lib/bio/appl/iprscan/report.rb, line 298
298: def to_hash
299: unless @ipr_ids
300: @ipr_ids = {}
301: @matches.each_with_index do |match, i|
302: @ipr_ids[match.ipr_id] ||= []
303: @ipr_ids[match.ipr_id] << match
304: end
305: return @ipr_ids
306: else
307: return @ipr_ids
308: end
309: end