-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_by_field.pl
73 lines (66 loc) · 1.56 KB
/
split_by_field.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/perl
sub usage (){
die qq(
#===============================================================================
#
# USAGE: ./sel_sam.pl <Input File> <Field number> <Prefix_out> <List_file>
#
# DESCRIPTION: Split sam file according chromosome [for too large to sort\]
# Field number start from 1
# If your file for split is too large,it is suggested to give a file containing all list
# File format are: chr1\\nchr2\\nchr3 chr4 chr5 etc
#
# Input_file and field_number is required. Prefix default "x"
#
# AUTHOR: Wang yu , wangyu.big\@gmail.com
# COMPANY: BIG.CAS
# CREATED: 08/28/2009 03:52:35 PM
#===============================================================================
)
}
use strict;
use warnings;
$ARGV[1]|| &usage();
$ARGV[2]||($ARGV[2]="x");
use FileHandle;
my %handle;
my @name;
if ($ARGV[3]){
open IN, $ARGV[3];
while(my $line=<IN>){
chomp $line;
my @a = split/\s+/,$line;
push (@name,@a);
}
close IN;
}
else {
@name=`cut -f$ARGV[1] $ARGV[0]|/share/disk6-4/wuzhy/wangy/bin/statis_uniq.pl|cut -f1`;
}
chomp @name;
for my $n (@name){ # open file handle
my $a;
$ARGV[2] ? ($a.=$ARGV[2]."_".$n) : ($a=$n);
my $fh = FileHandle->new(">$a");
$handle{$n}= $fh;
}
#print Dumper(%handle);
open IN2,$ARGV[0];
while(<IN2>){
chomp;
my @a=split(/\t/,$_);
#print $_,"\n";
#print $_[0],"\n";
#print $_[1],"\n";
my $fh = $handle{$a[$ARGV[1]-1]};
if($fh)
{
#print $_,"\n";
#print $a[$ARGV[1]-1],"\n";
print $fh ($_,"\n");
}
else
{
print $a[$ARGV[1]-1],"\n";
}
}