| Class | Bio::FlatFileIndex::Flat_1::FlatMappingFile |
| In: |
lib/bio/io/flatfile/index.rb
|
| Parent: | Object |
FlatMappingFile class.
Internal use only.
| filename | [R] | |
| mode | [RW] |
# File lib/bio/io/flatfile/index.rb, line 923
923: def self.external_merge_proc(sort_program = [ '/usr/bin/env',
924: 'LC_ALL=C',
925: '/usr/bin/sort' ])
926: Proc.new do |out, in1, *files|
927: # files (and in1) must be sorted
928: cmd = sort_program + [ '-m', '-o', out, in1, *files ]
929: system(*cmd)
930: end
931: end
# File lib/bio/io/flatfile/index.rb, line 900
900: def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env',
901: 'LC_ALL=C',
902: '/usr/bin/sort' ])
903: Proc.new do |out, in1, *files|
904: # (in1 may be sorted)
905: tf_all = []
906: tfn_all = []
907: files.each do |fn|
908: tf = Tempfile.open('sort')
909: tf.close(false)
910: cmd = sort_program + [ '-o', tf.path, fn ]
911: system(*cmd)
912: tf_all << tf
913: tfn_all << tf.path
914: end
915: cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ]
916: system(*cmd_fin)
917: tf_all.each do |tf|
918: tf.close(true)
919: end
920: end
921: end
# File lib/bio/io/flatfile/index.rb, line 891
891: def self.external_sort_proc(sort_program = [ '/usr/bin/env',
892: 'LC_ALL=C',
893: '/usr/bin/sort' ])
894: Proc.new do |out, in1, *files|
895: cmd = sort_program + [ '-o', out, in1, *files ]
896: system(*cmd)
897: end
898: end
# File lib/bio/io/flatfile/index.rb, line 933
933: def self.internal_sort_proc
934: Proc.new do |out, in1, *files|
935: a = IO.readlines(in1)
936: files.each do |fn|
937: IO.foreach(fn) do |x|
938: a << x
939: end
940: end
941: a.sort!
942: of = File.open(out, 'w')
943: a.each { |x| of << x }
944: of.close
945: end
946: end
# File lib/bio/io/flatfile/index.rb, line 734
734: def initialize(filename, mode = 'rb')
735: @filename = filename
736: @mode = mode
737: @file = nil
738: #@file = File.open(filename, mode)
739: @record_size = nil
740: @records = nil
741: end
# File lib/bio/io/flatfile/index.rb, line 806
806: def add_record(str)
807: n = records
808: rs = record_size
809: @file.seek(0, IO::SEEK_END)
810: write_record(str)
811: @records += 1
812: end
# File lib/bio/io/flatfile/index.rb, line 755
755: def close
756: if @file then
757: DEBUG.print "FlatMappingFile: close #{@filename}\n"
758: @file.close
759: @file = nil
760: end
761: nil
762: end
export/import/edit data
# File lib/bio/io/flatfile/index.rb, line 841
841: def each
842: n = records
843: seek(0)
844: (0...n).each do |i|
845: yield Record.new(get_record(i))
846: end
847: self
848: end
# File lib/bio/io/flatfile/index.rb, line 850
850: def export_tsv(stream)
851: self.each do |x|
852: stream << "#{x.to_s}\n"
853: end
854: stream
855: end
# File lib/bio/io/flatfile/index.rb, line 776
776: def get_record(i)
777: rs = record_size
778: seek(i)
779: str = @file.read(rs)
780: #DEBUG.print "get_record(#{i})=#{str.inspect}\n"
781: str
782: end
# File lib/bio/io/flatfile/index.rb, line 948
948: def import_tsv_files(flag_primary, mode, sort_proc, *files)
949: require 'tempfile'
950:
951: tmpfile1 = Tempfile.open('flat')
952: self.export_tsv(tmpfile1) unless mode == :new
953: tmpfile1.close(false)
954:
955: tmpfile0 = Tempfile.open('sorted')
956: tmpfile0.close(false)
957:
958: sort_proc.call(tmpfile0.path, tmpfile1.path, *files)
959:
960: tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
961: tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
962: tmpmap.close
963: self.close
964:
965: begin
966: File.rename(self.filename, self.filename + ".#{$$}.bak~")
967: rescue Errno::ENOENT
968: end
969: File.rename(tmpmap.filename, self.filename)
970: begin
971: File.delete(self.filename + ".#{$$}.bak~")
972: rescue Errno::ENOENT
973: end
974:
975: tmpfile0.close(true)
976: tmpfile1.close(true)
977: self
978: end
# File lib/bio/io/flatfile/index.rb, line 827
827: def init(rs)
828: unless 0 < rs and rs < 10 ** @@recsize_width then
829: raise 'record size out of range'
830: end
831: open
832: @record_size = rs
833: str = sprintf("%0*d", @@recsize_width, rs)
834: @file.truncate(0)
835: @file.seek(0, IO::SEEK_SET)
836: @file.write(str)
837: @records = 0
838: end
# File lib/bio/io/flatfile/index.rb, line 857
857: def init_with_sorted_tsv_file(filename, flag_primary = false)
858: rec_size = 1
859: f = File.open(filename)
860: f.each do |y|
861: rec_size = y.chomp.length if rec_size < y.chomp.length
862: end
863: self.init(rec_size)
864:
865: prev = nil
866: f.rewind
867: if flag_primary then
868: f.each do |y|
869: x = Record.new(y.chomp, rec_size)
870: if prev then
871: if x.key == prev.key
872: DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
873: else
874: self.add_record(prev.to_s)
875: end
876: end
877: prev = x
878: end
879: self.add_record(prev.to_s) if prev
880: else
881: f.each do |y|
882: x = Record.new(y.chomp, rec_size)
883: self.add_record(x.to_s) if x != prev
884: prev = x
885: end
886: end
887: f.close
888: self
889: end
# File lib/bio/io/flatfile/index.rb, line 745
745: def open
746: unless @file then
747: DEBUG.print "FlatMappingFile: open #{@filename}\n"
748: @file = File.open(@filename, @mode)
749: true
750: else
751: nil
752: end
753: end
# File lib/bio/io/flatfile/index.rb, line 814
814: def put_record(i, str)
815: n = records
816: rs = record_size
817: if i >= n then
818: @file.seek(0, IO::SEEK_END)
819: @file.write(sprintf("%-*s", rs, '') * (i - n))
820: @records = i + 1
821: else
822: seek(i)
823: end
824: write_record(str)
825: end
# File lib/bio/io/flatfile/index.rb, line 764
764: def record_size
765: unless @record_size then
766: open
767: @file.seek(0, IO::SEEK_SET)
768: s = @file.read(@@recsize_width)
769: raise 'strange record size' unless s =~ @@recsize_regex
770: @record_size = s.to_i
771: DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
772: end
773: @record_size
774: end
# File lib/bio/io/flatfile/index.rb, line 789
789: def records
790: unless @records then
791: rs = record_size
792: @records = (@file.stat.size - @@recsize_width) / rs
793: DEBUG.print "FlatMappingFile: records: #{@records}\n"
794: end
795: @records
796: end
methods for searching
# File lib/bio/io/flatfile/index.rb, line 982
982: def search(key)
983: n = records
984: return [] if n <= 0
985: i = n / 2
986: i_prev = nil
987: DEBUG.print "binary search starts...\n"
988: begin
989: rec = Record.new(get_record(i))
990: i_prev = i
991: if key < rec.key then
992: n = i
993: i = i / 2
994: elsif key > rec.key then
995: i = (i + n) / 2
996: else # key == rec.key
997: result = [ rec.val ]
998: j = i - 1
999: while j >= 0 and
1000: (rec = Record.new(get_record(j))).key == key
1001: result << rec.val
1002: j = j - 1
1003: end
1004: result.reverse!
1005: j = i + 1
1006: while j < n and
1007: (rec = Record.new(get_record(j))).key == key
1008: result << rec.val
1009: j = j + 1
1010: end
1011: DEBUG.print "#{result.size} hits found!!\n"
1012: return result
1013: end
1014: end until i_prev == i
1015: DEBUG.print "no hits found\n"
1016: #nil
1017: []
1018: end
# File lib/bio/io/flatfile/index.rb, line 784
784: def seek(i)
785: rs = record_size
786: @file.seek(@@recsize_width + rs * i)
787: end