| Module | Bio::FlatFileIndex::Indexer |
| In: |
lib/bio/io/flatfile/indexer.rb
|
| DEFAULT_SORT | = | '/usr/bin/sort' | default sort program | |
| DEFAULT_ENV | = | '/usr/bin/env' | default env program (run a program in a modified environment) | |
| DEFAULT_ENV_ARGS | = | [ 'LC_ALL=C' ] | default arguments for env program |
# File lib/bio/io/flatfile/indexer.rb, line 476
476: def self.addindex_bdb(db, flag, need_update, parser, options)
477: DEBUG.print "reading files...\n"
478:
479: pn = db.primary
480: pn.file.close
481: pn.file.flag = flag
482:
483: db.secondary.each_files do |x|
484: x.file.close
485: x.file.flag = flag
486: x.file.open
487: x.file.close
488: end
489:
490: need_update.each do |fileid|
491: filename = db.fileids[fileid].filename
492: parser.open_flatfile(fileid, filename)
493: parser.each do |pos, len|
494: p = parser.parse_primary
495: #pn.file.add_exclusive(p, [ fileid, pos, len ])
496: pn.file.add_overwrite(p, [ fileid, pos, len ])
497: #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
498: parser.parse_secondary do |sn, sp|
499: db.secondary[sn].file.add_nr(sp, p)
500: #DEBUG.print "#{sp} #{p}\n"
501: end
502: end
503: parser.close_flatfile
504: end
505: true
506: end
# File lib/bio/io/flatfile/indexer.rb, line 525
525: def self.addindex_flat(db, mode, need_update, parser, options)
526: require 'tempfile'
527: prog = options['sort_program']
528: env = options['env_program']
529: env_args = options['env_program_arguments']
530:
531: return false if need_update.to_a.size == 0
532:
533: DEBUG.print "prepare temporary files...\n"
534: tempbase = "bioflat#{rand(10000)}-"
535: pfile = Tempfile.open(tempbase + 'primary-')
536: DEBUG.print "open temporary file #{pfile.path.inspect}\n"
537: sfiles = {}
538: parser.secondary.names.each do |x|
539: sfiles[x] = Tempfile.open(tempbase + 'secondary-')
540: DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
541: end
542:
543: DEBUG.print "reading files...\n"
544: need_update.each do |fileid|
545: filename = db.fileids[fileid].filename
546: parser.open_flatfile(fileid, filename)
547: parser.each do |pos, len|
548: p = parser.parse_primary
549: pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
550: #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
551: parser.parse_secondary do |sn, sp|
552: sfiles[sn] << "#{sp}\t#{p}\n"
553: #DEBUG.print "#{sp} #{p}\n"
554: end
555: end
556: parser.close_flatfile
557: fileid += 1
558: end
559:
560: sort_proc = chose_sort_proc(prog, mode, env, env_args)
561: pfile.close(false)
562: DEBUG.print "sorting primary (#{parser.primary.name})...\n"
563: db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
564: pfile.close(true)
565:
566: parser.secondary.names.each do |x|
567: DEBUG.print "sorting secondary (#{x})...\n"
568: sfiles[x].close(false)
569: db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
570: sfiles[x].path)
571: sfiles[x].close(true)
572: end
573: true
574: end
# File lib/bio/io/flatfile/indexer.rb, line 585
585: def self.chose_sort_proc(prog, mode = :new,
586: env = nil, env_args = nil)
587: case prog
588: when /^builtin$/i, /^hs$/i, /^lm$/i
589: DEBUG.print "sort: internal sort routine\n"
590: sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
591: when nil, ''
592: if FileTest.executable?(DEFAULT_SORT)
593: return chose_sort_proc(DEFAULT_SORT, mode, env, env_args)
594: else
595: DEBUG.print "sort: internal sort routine\n"
596: sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
597: end
598: else
599: env_args ||= DEFAULT_ENV_ARGS
600: if env == '' or env == false then # inhibit to use env program
601: prefixes = [ prog ]
602: elsif env then # uses given env program
603: prefixes = [ env ] + env_args + [ prog ]
604: else # env == nil; uses default env program if possible
605: if FileTest.executable?(DEFAULT_ENV)
606: prefixes = [ DEFAULT_ENV ] + env_args + [ prog ]
607: else
608: prefixes = [ prog ]
609: end
610: end
611: DEBUG.print "sort: #{prefixes.join(' ')}\n"
612: if mode == :new then
613: sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes)
614: else
615: sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes)
616: end
617: end
618: sort_proc
619: end
# File lib/bio/io/flatfile/indexer.rb, line 451
451: def self.makeindexBDB(name, parser, options, *files)
452: # options are not used in this method
453: unless defined?(BDB)
454: raise RuntimeError, "Berkeley DB support not found"
455: end
456: DEBUG.print "makeing BDB DataBank...\n"
457: db = DataBank.new(name, MAGIC_BDB)
458: db.format = parser.format
459: db.fileids.add(*files)
460: db.fileids.recalc
461:
462: db.primary = parser.primary.name
463: db.secondary = parser.secondary.names
464:
465: DEBUG.print "writing config.dat, config, fileids ...\n"
466: db.write('wb', BDBdefault::flag_write)
467:
468: DEBUG.print "reading files...\n"
469:
470: addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
471: parser, options)
472: db.close
473: true
474: end
# File lib/bio/io/flatfile/indexer.rb, line 508
508: def self.makeindexFlat(name, parser, options, *files)
509: DEBUG.print "makeing flat/1 DataBank using temporary files...\n"
510:
511: db = DataBank.new(name, nil)
512: db.format = parser.format
513: db.fileids.add(*files)
514: db.primary = parser.primary.name
515: db.secondary = parser.secondary.names
516: db.fileids.recalc
517: DEBUG.print "writing DabaBank...\n"
518: db.write('wb')
519:
520: addindex_flat(db, :new, (0...(files.size)), parser, options)
521: db.close
522: true
523: end
# File lib/bio/io/flatfile/indexer.rb, line 621
621: def self.update_index(name, parser, options, *files)
622: db = DataBank.open(name)
623:
624: if parser then
625: raise 'file format mismatch' if db.format != parser.format
626: else
627:
628: begin
629: dbclass_orig =
630: Bio::FlatFile.autodetect_file(db.fileids[0].filename)
631: rescue TypeError, Errno::ENOENT
632: end
633: begin
634: dbclass_new =
635: Bio::FlatFile.autodetect_file(files[0])
636: rescue TypeError, Errno::ENOENT
637: end
638:
639: case db.format
640: when 'swiss', 'embl'
641: parser = Parser.new(db.format)
642: if dbclass_new and dbclass_new != parser.dbclass
643: raise 'file format mismatch'
644: end
645: when 'genbank'
646: dbclass = dbclass_orig or dbclass_new
647: if dbclass == Bio::GenBank or dbclass == Bio::GenPept
648: parser = Parser.new(dbclass_orig)
649: elsif !dbclass then
650: raise 'cannnot determine format. please specify manually.'
651: else
652: raise 'file format mismatch'
653: end
654: if dbclass_new and dbclass_new != parser.dbclass
655: raise 'file format mismatch'
656: end
657: else
658: raise 'unsupported format'
659: end
660: end
661:
662: parser.set_primary_namespace(db.primary.name)
663: parser.add_secondary_namespaces(*db.secondary.names)
664:
665: if options['renew'] then
666: newfiles = db.fileids.filenames.find_all do |x|
667: FileTest.exist?(x)
668: end
669: newfiles.concat(files)
670: newfiles2 = newfiles.sort
671: newfiles2.uniq!
672: newfiles3 = []
673: newfiles.each do |x|
674: newfiles3 << x if newfiles2.delete(x)
675: end
676: t = db.index_type
677: db.close
678: case t
679: when MAGIC_BDB
680: Indexer::makeindexBDB(name, parser, options, *newfiles3)
681: when MAGIC_FLAT
682: Indexer::makeindexFlat(name, parser, options, *newfiles3)
683: else
684: raise 'Unsupported index type'
685: end
686: return true
687: end
688:
689: need_update = []
690: newfiles = files.dup
691: db.fileids.cache_all
692: db.fileids.each_with_index do |f, i|
693: need_update << i unless f.check
694: newfiles.delete(f.filename)
695: end
696:
697: b = db.fileids.size
698: begin
699: db.fileids.recalc
700: rescue Errno::ENOENT => evar
701: DEBUG.print "Error: #{evar}\n"
702: DEBUG.print "assumed --renew option\n"
703: db.close
704: options = options.dup
705: options['renew'] = true
706: update_index(name, parser, options, *files)
707: return true
708: end
709: # add new files
710: db.fileids.add(*newfiles)
711: db.fileids.recalc
712:
713: need_update.concat((b...(b + newfiles.size)).to_a)
714:
715: DEBUG.print "writing DabaBank...\n"
716: db.write('wb', BDBdefault::flag_append)
717:
718: case db.index_type
719: when MAGIC_BDB
720: addindex_bdb(db, BDBdefault::flag_append,
721: need_update, parser, options)
722: when MAGIC_FLAT
723: addindex_flat(db, :add, need_update, parser, options)
724: else
725: raise 'Unsupported index type'
726: end
727:
728: db.close
729: true
730: end