| Class | Bio::GCG::Msf |
| In: |
lib/bio/appl/gcg/msf.rb
|
| Parent: | Object |
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
| DELIMITER | = | RS = nil | delimiter used by Bio::FlatFile |
# File lib/bio/appl/gcg/msf.rb, line 31
31: def initialize(str)
32: str = str.sub(/\A[\r\n]+/, '')
33: if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then
34: @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
35: str.sub!(/.*/, '')
36: end
37: str.sub!(/.*\.\.$/m, '')
38: @description = $&.to_s.sub(/^.*\.\.$/, '').to_s
39: d = $&.to_s
40: if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
41: @entry_id = m[1].to_s.strip
42: @length = (m[2] ? m[2].to_i : nil)
43: @seq_type = m[3]
44: @date = m[4].to_s.strip
45: @checksum = (m[6] ? m[6].to_i : nil)
46: end
47:
48: str.sub!(/.*\/\/$/m, '')
49: a = $&.to_s.split(/^/)
50: @seq_info = []
51: a.each do |x|
52: if /Name\: / =~ x then
53: s = {}
54: x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
55: @seq_info << s
56: end
57: end
58:
59: @data = str
60: @description.sub!(/\A(\r\n|\r|\n)/, '')
61: @align = nil
62: end
returns Bio::Alignment object.
# File lib/bio/appl/gcg/msf.rb, line 179
179: def alignment
180: do_parse
181: @align
182: end
CompCheck field
# File lib/bio/appl/gcg/msf.rb, line 122
122: def compcheck
123: unless defined?(@compcheck)
124: if /CompCheck\: +(\d+)/ =~ @description then
125: @compcheck = $1.to_i
126: else
127: @compcheck = nil
128: end
129: end
130: @compcheck
131: end
gap length weight
# File lib/bio/appl/gcg/msf.rb, line 113
113: def gap_length_weight
114: unless defined?(@gap_length_weight)
115: /GapLengthWeight\: +(\S+)/ =~ @description
116: @gap_length_weight = $1
117: end
118: @gap_length_weight
119: end
gap weight
# File lib/bio/appl/gcg/msf.rb, line 104
104: def gap_weight
105: unless defined?(@gap_weight)
106: /GapWeight\: +(\S+)/ =~ @description
107: @gap_weight = $1
108: end
109: @gap_weight
110: end
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb, line 185
185: def seq_data
186: do_parse
187: @seq_data
188: end
symbol comparison table
# File lib/bio/appl/gcg/msf.rb, line 95
95: def symbol_comparison_table
96: unless defined?(@symbol_comparison_table)
97: /Symbol comparison table\: +(\S+)/ =~ @description
98: @symbol_comparison_table = $1
99: end
100: @symbol_comparison_table
101: end
validates checksum
# File lib/bio/appl/gcg/msf.rb, line 191
191: def validate_checksum
192: do_parse
193: valid = true
194: total = 0
195: @seq_data.each_with_index do |x, i|
196: sum = Bio::GCG::Seq.calc_checksum(x)
197: if sum != @seq_info[i]['Check'].to_i
198: valid = false
199: break
200: end
201: total += sum
202: end
203: return false unless valid
204: if @checksum != 0 # "Check:" field of BioPerl is always 0
205: valid = ((total % 10000) == @checksum)
206: end
207: valid
208: end