| Class | Bio::SPTR |
| In: |
lib/bio/db/embl/sptr.rb
|
| Parent: | EMBLDB |
| dr | -> | embl_dr |
| Backup Bio::EMBLDB#dr as embl_dr | ||
returns contents in the CC lines.
returns an object of contents in the TOPIC.
returns contents of the "ALTERNATIVE PRODUCTS".
{'Event' => str,
'Named isoforms' => int,
'Comment' => str,
'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
CC -!- ALTERNATIVE PRODUCTS:
CC Event=Alternative splicing; Named isoforms=15;
...
CC placentae isoforms. All tissues differentially splice exon 13;
CC Name=A; Synonyms=no del;
CC IsoId=P15529-1; Sequence=Displayed;
returns contents of the "DATABASE".
[{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
returns contents of the "MASS SPECTROMETRY".
[{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT CC IN LIVER, KIDNEY, LUNG AND BRAIN. CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
See also www.expasy.org/sprot/userman.html#CC_line
# File lib/bio/db/embl/sptr.rb, line 612
612: def cc(topic = nil)
613: unless @data['CC']
614: cc = Hash.new
615: comment_border= '-' * (77 - 4 + 1)
616: dlm = /-!- /
617:
618: # 12KD_MYCSM has no CC lines.
619: return cc if get('CC').size == 0
620:
621: cc_raw = fetch('CC')
622:
623: # Removing the copyright statement.
624: cc_raw.sub!(/ *---.+---/m, '')
625:
626: # Not any CC Lines without the copyright statement.
627: return cc if cc_raw == ''
628:
629: begin
630: cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
631: cc_raw = cc_raw.sub(dlm,'')
632: cc_raw.split(dlm).each do |tmp|
633: tmp = tmp.strip
634:
635: if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
636: key = $1
637: body = $2
638: body.gsub!(/- (?!AND)/,'-')
639: body.strip!
640: unless cc[key]
641: cc[key] = [body]
642: else
643: cc[key].push(body)
644: end
645: else
646: raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
647: '', get('CC'),''].join("\n")
648: end
649: end
650: rescue NameError
651: if fetch('CC') == ''
652: return {}
653: else
654: raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
655: "\n'#{self.get('CC')}'\n", "(#{$!})"].join
656: end
657: rescue NoMethodError
658: end
659:
660: @data['CC'] = cc
661: end
662:
663:
664: case topic
665: when 'ALLERGEN'
666: return @data['CC'][topic]
667: when 'ALTERNATIVE PRODUCTS'
668: return cc_alternative_products(@data['CC'][topic])
669: when 'BIOPHYSICOCHEMICAL PROPERTIES'
670: return cc_biophysiochemical_properties(@data['CC'][topic])
671: when 'BIOTECHNOLOGY'
672: return @data['CC'][topic]
673: when 'CATALITIC ACTIVITY'
674: return cc_catalytic_activity(@data['CC'][topic])
675: when 'CAUTION'
676: return cc_caution(@data['CC'][topic])
677: when 'COFACTOR'
678: return @data['CC'][topic]
679: when 'DEVELOPMENTAL STAGE'
680: return @data['CC'][topic].join('')
681: when 'DISEASE'
682: return @data['CC'][topic].join('')
683: when 'DOMAIN'
684: return @data['CC'][topic]
685: when 'ENZYME REGULATION'
686: return @data['CC'][topic].join('')
687: when 'FUNCTION'
688: return @data['CC'][topic].join('')
689: when 'INDUCTION'
690: return @data['CC'][topic].join('')
691: when 'INTERACTION'
692: return cc_interaction(@data['CC'][topic])
693: when 'MASS SPECTROMETRY'
694: return cc_mass_spectrometry(@data['CC'][topic])
695: when 'MISCELLANEOUS'
696: return @data['CC'][topic]
697: when 'PATHWAY'
698: return cc_pathway(@data['CC'][topic])
699: when 'PHARMACEUTICAL'
700: return @data['CC'][topic]
701: when 'POLYMORPHISM'
702: return @data['CC'][topic]
703: when 'PTM'
704: return @data['CC'][topic]
705: when 'RNA EDITING'
706: return cc_rna_editing(@data['CC'][topic])
707: when 'SIMILARITY'
708: return @data['CC'][topic]
709: when 'SUBCELLULAR LOCATION'
710: return cc_subcellular_location(@data['CC'][topic])
711: when 'SUBUNIT'
712: return @data['CC'][topic]
713: when 'TISSUE SPECIFICITY'
714: return @data['CC'][topic]
715: when 'TOXIC DOSE'
716: return @data['CC'][topic]
717: when 'WEB RESOURCE'
718: return cc_web_resource(@data['CC'][topic])
719: when 'DATABASE'
720: # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
721: tmp = Array.new
722: db = @data['CC']['DATABASE']
723: return db unless db
724:
725: db.each do |e|
726: db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
727: e.sub(/.$/,'').split(/;/).each do |line|
728: case line
729: when /NAME=(.+)/
730: db['NAME'] = $1
731: when /NOTE=(.+)/
732: db['NOTE'] = $1
733: when /WWW="(.+)"/
734: db['WWW'] = $1
735: when /FTP="(.+)"/
736: db['FTP'] = $1
737: end
738: end
739: tmp.push(db)
740: end
741: return tmp
742: when nil
743: return @data['CC']
744: else
745: return @data['CC'][topic]
746: end
747: end
CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
# File lib/bio/db/embl/sptr.rb, line 924
924: def cc_web_resource(data)
925: data.map {|x|
926: entry = {'NAME' => nil, 'NOTE' => nil, 'URL' => nil}
927: x.split(';').each do |y|
928: case y
929: when /NAME=(.+)/
930: entry['NAME'] = $1.strip
931: when /NOTE=(.+)/
932: entry['NOTE'] = $1.strip
933: when /URL="(.+)"/
934: entry['URL'] = $1.strip
935: end
936: end
937: entry
938: }
939: end
# File lib/bio/db/embl/sptr.rb, line 959
959: def dr(key = nil)
960: unless key
961: embl_dr
962: else
963: (embl_dr[key] or []).map {|x|
964: {'Accession' => x[0],
965: 'Version' => x[1],
966: ' ' => x[2],
967: 'Molecular Type' => x[3]}
968: }
969: end
970: end
returns a Hash of information in the DT lines.
hash keys: ['created', 'sequence', 'annotation'] also Symbols acceptable (ASAP): [:created, :sequence, :annotation]
returns a String of information in the DT lines by a given key..
DT DD-MMM-YYY (rel. NN, Created) DT DD-MMM-YYY (rel. NN, Last sequence update) DT DD-MMM-YYY (rel. NN, Last annotation update)
# File lib/bio/db/embl/sptr.rb, line 123
123: def dt(key = nil)
124: return dt[key] if key
125: return @data['DT'] if @data['DT']
126:
127: part = self.get('DT').split(/\n/)
128: @data['DT'] = {
129: 'created' => part[0].sub(/\w{2} /,'').strip,
130: 'sequence' => part[1].sub(/\w{2} /,'').strip,
131: 'annotation' => part[2].sub(/\w{2} /,'').strip
132: }
133: end
returns a ENTRY_NAME in the ID line.
# File lib/bio/db/embl/sptr.rb, line 79
79: def entry_id
80: id_line('ENTRY_NAME')
81: end
returns contents in the feature table.
sp = Bio::SPTR.new(entry)
ft = sp.ft
ft.class #=> Hash
ft.keys.each do |feature_key|
ft[feature_key].each do |feature|
feature['From'] #=> '1'
feature['To'] #=> '21'
feature['Description'] #=> ''
feature['FTId'] #=> ''
feature['diff'] #=> []
feature['original'] #=> [feature_key, '1', '21', '', '']
end
end
{FEATURE_KEY => [{'From' => int, 'To' => int,
'Description' => aStr, 'FTId' => aStr,
'diff' => [original_residues, changed_residues],
'original' => aAry }],...}
returns an Array of the information about the feature_name in the feature table.
[{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
Col Data item ----- ----------------- 1- 2 FT 6-13 Feature name 15-20 `FROM' endpoint 22-27 `TO' endpoint 35-75 Description (>=0 per key) ----- -----------------
Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ’<’, ’>’ or ’?’. (c.f. ’<1’, ’?42’)
See also www.expasy.org/sprot/userman.html#FT_line
# File lib/bio/db/embl/sptr.rb, line 1024
1024: def ft(feature_key = nil)
1025: return ft[feature_key] if feature_key
1026: return @data['FT'] if @data['FT']
1027:
1028: table = []
1029: begin
1030: get('FT').split("\n").each do |line|
1031: if line =~ /^FT \w/
1032: feature = line.chomp.ljust(74)
1033: table << [feature[ 5..12].strip, # Feature Name
1034: feature[14..19].strip, # From
1035: feature[21..26].strip, # To
1036: feature[34..74].strip ] # Description
1037: else
1038: table.last << line.chomp.sub!(/^FT +/, '')
1039: end
1040: end
1041:
1042: # Joining Description lines
1043: table = table.map { |feature|
1044: ftid = feature.pop if feature.last =~ /FTId=/
1045: if feature.size > 4
1046: feature = [feature[0],
1047: feature[1],
1048: feature[2],
1049: feature[3, feature.size - 3].join(" ")]
1050: end
1051: feature << if ftid then ftid else '' end
1052: }
1053:
1054: hash = {}
1055: table.each do |feature|
1056: hash[feature[0]] = [] unless hash[feature[0]]
1057: hash[feature[0]] << {
1058: # Removing '<', '>' or '?' in FROM/TO endopoint.
1059: 'From' => feature[1].sub(/\D/, '').to_i,
1060: 'To' => feature[2].sub(/\D/, '').to_i,
1061: 'Description' => feature[3],
1062: 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
1063: 'diff' => [],
1064: 'original' => feature
1065: }
1066:
1067: case feature[0]
1068: when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1069: case hash[feature[0]].last['Description']
1070: when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1071: original_res = $1
1072: changed_res = $2
1073: original_res = original_res.gsub(/ /,'').strip
1074: chenged_res = changed_res.gsub(/ /,'').strip
1075: when /Missing/i
1076: original_res = seq.subseq(hash[feature[0]].last['From'],
1077: hash[feature[0]].last['To'])
1078: changed_res = ''
1079: end
1080: hash[feature[0]].last['diff'] = [original_res, chenged_res]
1081: end
1082: end
1083: rescue
1084: raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1085: end
1086:
1087: @data['FT'] = hash
1088: end
returns a Array of gene names in the GN line.
# File lib/bio/db/embl/sptr.rb, line 264
264: def gene_names
265: gn # set @data['GN'] if it hasn't been already done
266: if @data['GN'].first.class == Hash then
267: @data['GN'].collect { |element| element[:name] }
268: else
269: @data['GN'].first
270: end
271: end
returns gene names in the GN line.
where <gene record> is:
{ :name => '...',
:synonyms => [ 's1', 's2', ... ],
:loci => [ 'l1', 'l2', ... ],
:orfs => [ 'o1', 'o2', ... ]
}
Old format:
# File lib/bio/db/embl/sptr.rb, line 188
188: def gn
189: unless @data['GN']
190: case fetch('GN')
191: when /Name=/,/ORFNames=/
192: @data['GN'] = gn_uniprot_parser
193: else
194: @data['GN'] = gn_old_parser
195: end
196: end
197: @data['GN']
198: end
Bio::SPTR#hi #=> hash
# File lib/bio/db/embl/sptr.rb, line 528
528: def hi
529: unless @data['HI']
530: @data['HI'] = []
531: fetch('HI').split(/\. /).each do |hlist|
532: hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''}
533: hash['Category'], hash['Keywords'] = hlist.split(': ')
534: hash['Keywords'] = hash['Keywords'].split('; ')
535: hash['Keyword'] = hash['Keywords'].pop
536: hash['Keyword'].sub!(/\.$/, '')
537: @data['HI'] << hash
538: end
539: end
540: @data['HI']
541: end
returns a Hash of the ID line.
returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]
ID P53_HUMAN STANDARD; PRT; 393 AA.
#"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
"SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
# File lib/bio/db/embl/sptr.rb, line 63
63: def id_line(key = nil)
64: return id_line[key] if key
65: return @data['ID'] if @data['ID']
66:
67: part = @orig['ID'].split(/ +/)
68: @data['ID'] = {
69: 'ENTRY_NAME' => part[1],
70: 'DATA_CLASS' => part[2].sub(/;/,''),
71: 'MOLECULE_TYPE' => part[3].sub(/;/,''),
72: 'SEQUENCE_LENGTH' => part[4].to_i
73: }
74: end
returns a MOLECULE_TYPE in the ID line.
A short-cut for Bio::SPTR#id_line(‘MOLECULE_TYPE’).
# File lib/bio/db/embl/sptr.rb, line 89
89: def molecule
90: id_line('MOLECULE_TYPE')
91: end
OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line
# File lib/bio/db/embl/sptr.rb, line 358
358: def oh
359: unless @data['OH']
360: @data['OH'] = fetch('OH').split("\. ").map {|x|
361: if x =~ /NCBI_TaxID=(\d+);/
362: taxid = $1
363: else
364: raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
365: $!, "\n", get('OH'), "\n"].join
366:
367: end
368: if x =~ /NCBI_TaxID=\d+; (.+)/
369: host_name = $1
370: host_name.sub!(/\.$/, '')
371: else
372: host_name = nil
373: end
374: {'NCBI_TaxID' => taxid, 'HostName' => host_name}
375: }
376: end
377: @data['OH']
378: end
returns a Array of Hashs or a String of the OS line when a key given.
[{'name' => '(Human)', 'os' => 'Homo sapiens'},
{'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
{'name' => "(Human)", 'os' => 'Homo sapiens'}
OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name0) (name1). OS Homo sapiens (Human), and Rarrus norveticus (Rat) OS Hippotis sp. Clark and Watts 825. OS unknown cyperaceous sp.
# File lib/bio/db/embl/sptr.rb, line 297
297: def os(num = nil)
298: unless @data['OS']
299: os = Array.new
300: fetch('OS').split(/, and|, /).each do |tmp|
301: if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
302: org = $1
303: tmp =~ /(\(.+\))/
304: os.push({'name' => $1, 'os' => org})
305: else
306: raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
307: end
308: end
309: @data['OS'] = os
310: end
311:
312: if num
313: # EX. "Trifolium repens (white clover)"
314: return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
315: else
316: return @data['OS']
317: end
318: end
returns a Hash of oraganism taxonomy cross-references.
{'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
OX NCBI_TaxID=1234; OX NCBI_TaxID=1234, 2345, 3456, 4567;
# File lib/bio/db/embl/sptr.rb, line 341
341: def ox
342: unless @data['OX']
343: tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
344: hsh = Hash.new
345: tmp.each do |e|
346: db,refs = e.split(/=/)
347: hsh[db] = refs.split(/, */)
348: end
349: @data['OX'] = hsh
350: end
351: return @data['OX']
352: end
returns the proposed official name of the protein.
"DE #{OFFICIAL_NAME} (#{SYNONYM})"
"DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
OFFICIAL_NAME 1/entry
SYNONYM >=0
CONTEINS >=0
# File lib/bio/db/embl/sptr.rb, line 144
144: def protein_name
145: name = ""
146: if de_line = fetch('DE') then
147: str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
148: name = str[/^[^(]*/].strip
149: name << ' (Fragment)' if str =~ /fragment/i
150: end
151: return name
152: end
returns contents in the R lines.
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
# File lib/bio/db/embl/sptr.rb, line 394
394: def ref
395: unless @data['R']
396: @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str|
397: hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
398: 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
399: str = 'RN ' + str unless /^RN / =~ str
400:
401: str.split("\n").each do |line|
402: if /^(R[NPXARLCTG]) (.+)/ =~ line
403: hash[$1] += $2 + ' '
404: else
405: raise "Invalid format in R lines, \n[#{line}]\n"
406: end
407: end
408:
409: hash['RN'] = set_RN(hash['RN'])
410: hash['RC'] = set_RC(hash['RC'])
411: hash['RP'] = set_RP(hash['RP'])
412: hash['RX'] = set_RX(hash['RX'])
413: hash['RA'] = set_RA(hash['RA'])
414: hash['RT'] = set_RT(hash['RT'])
415: hash['RL'] = set_RL(hash['RL'])
416: hash['RG'] = set_RG(hash['RG'])
417:
418: hash
419: }
420:
421: end
422: @data['R']
423: end
returns Bio::Reference object from Bio::EMBLDB::Common#ref.
# File lib/bio/db/embl/sptr.rb, line 488
488: def references
489: unless @data['references']
490: ary = self.ref.map {|ent|
491: hash = Hash.new('')
492: ent.each {|key, value|
493: case key
494: when 'RA'
495: hash['authors'] = value.split(/, /)
496: when 'RT'
497: hash['title'] = value
498: when 'RL'
499: if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
500: hash['journal'] = $1
501: hash['volume'] = $2
502: hash['issue'] = $3
503: hash['pages'] = $4
504: hash['year'] = $5
505: else
506: hash['journal'] = value
507: end
508: when 'RX' # PUBMED, MEDLINE, DOI
509: value.each do |tag, xref|
510: hash[ tag.downcase ] = xref
511: end
512: end
513: }
514: Reference.new(hash)
515: }
516: @data['references'] = References.new(ary)
517: end
518: @data['references']
519: end
returns a Bio::Sequence::AA of the amino acid sequence.
blank Line; sequence data (>=1)
# File lib/bio/db/embl/sptr.rb, line 1134
1134: def seq
1135: unless @data['']
1136: @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
1137: end
1138: return @data['']
1139: end
returns a SEQUENCE_LENGTH in the ID line.
A short-cut for Bio::SPTR#id_line(‘SEQUENCE_LENGHT’).
# File lib/bio/db/embl/sptr.rb, line 98
98: def sequence_length
99: id_line('SEQUENCE_LENGTH')
100: end
returns a Hash of conteins in the SQ lines.
returns a value of a key given in the SQ lines.
'CRC64']
SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
# File lib/bio/db/embl/sptr.rb, line 1106
1106: def sq(key = nil)
1107: unless @data['SQ']
1108: if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
1109: @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
1110: else
1111: raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
1112: end
1113: end
1114:
1115: if key
1116: case key
1117: when /mw/, /molecular/, /weight/
1118: @data['SQ']['MW']
1119: when /len/, /length/, /AA/
1120: @data['SQ']['aalen']
1121: else
1122: @data['SQ'][key]
1123: end
1124: else
1125: @data['SQ']
1126: end
1127: end
returns an array of synonyms (unofficial names).
synonyms are each placed in () following the official name on the DE line.
# File lib/bio/db/embl/sptr.rb, line 158
158: def synonyms
159: ary = Array.new
160: if de_line = fetch('DE') then
161: line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part
162: line.scan(/\([^)]+/) do |synonym|
163: unless synonym =~ /fragment/i then
164: ary << synonym[1..-1].strip # index to remove the leading (
165: end
166: end
167: end
168: return ary
169: end