Welcome to mirror list, hosted at ThFree Co, Russian Federation.

mapscript.sh « cluster - github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: f8173a0732737cc0ad2b90d659eed4f9d329a6d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
out_directory=$1
in_directory=$2
nmappers=$3;
realmappers=$4
hadoop fs -rmr $out_directory > /dev/null 2>&1; 
total=`hadoop fs -ls $in_directory | cut -d " " -f 7 | awk 'BEGIN{sum = 0} {if(NF > 0) sum += $1;} END{print sum;}'`
echo $total
mapsize=`expr $total / $nmappers`
maprem=`expr $total % $nmappers`
mapsize=`expr $mapsize + $maprem`
mapsize=`expr $mapsize + 100`
echo $mapsize
killall allreduce_master
./allreduce_master $realmappers 2
master=`hostname`
mapcommand="runvw.sh $out_directory $master"
echo $mapcommand
hadoop jar $HADOOP_HOME/hadoop-streaming.jar -Dmapred.job.queue.name=search -Dmapred.min.split.size=$mapsize -Dmapred.reduce.tasks=0 -Dmapred.job.map.memory.mb=3000 -Dmapred.child.java.opts="-Xmx100m" -Dmapred.task.timeout=600000000 -input $in_directory -output $out_directory -file ../vw -file runvw.sh -mapper "$mapcommand" -reducer NONE