#!/bin/perl ### INSTALL DIRECTIONS: # # 1. Install this PERL executable, sge-lam inside the LAM bin dir. # Make sure it is executable. # 2. Modify the following variables: LAMHOME below to fit your site setup. # $LAMHOME="/temp/LAM"; # 3. Create an SGE PE that can be used to submit lam jobs. The following # is an example assuming the scripts exist in /usr/local/lam/bin. # You should replace the queue_list and slots with your site specific # values or set it to "all" to use all the queues. # # % qconf -sp lammpi # pe_name lammpi # queue_list all # slots 6 # user_lists NONE # xuser_lists NONE # start_proc_args /usr/local/lam/bin/sge-lam start # stop_proc_args /usr/local/lam/bin/sge-lam stop # allocation_rule 2 # control_slaves TRUE # job_is_first_task TRUE # # NOTE: It is probably easiest to use the qmon GUI to create the PE. # # 4. Add a new LAM node process schema into the $LAMHOME/etc area # named sge-lam-conf.lamd. This should be a single line that # adds the "sge-lam qrsh-local" prefix to the lamd startup. # # % cat /usr/local/lam/etc/sge-lam-conf.lamd # /usr/local/lam/bin/sge-lam qrsh-local /usr/local/lam/bin/lamd # $inet_topo $debug $session_prefix $session_suffix # #### Submitting SGE JOBS # # Once this is setup users can submit jobs as normal and should not need to # lamboot on their own. Users need only call mpirun for their MPI programs. # Here is an example job: # # % cat lamjob.csh # #$ -cwd # set path=(/usr/local/lam/bin $path) # echo "Starting my LAM MPI job" # mpirun C conn-60 # echo "LAM MPI job done" # # # #### Comments/Issues email: christopher.duncan@xxxxxxx # # END INSTALL $verbose=1; $debug=0; # close STDIN to avoid stdio race conditions and tty issues close(STDIN); if( $debug eq 1){ open(SGEDEBUG,"> /tmp/sgedebug.$ENV{JOB_ID}.$$"); select(SGEDEBUG); $|=1; open(STDERR,">> /tmp/sgedebug.$ENV{JOB_ID}.$$"); } # set output for stderr and stdout to be unbuffered select(STDERR); $|=1; select(STDOUT); $|=1; $lamboot="$LAMHOME/bin/lamboot"; $lamhalt="$LAMHOME/bin/lamhalt"; $sgelamconf="sge-lam-conf.lamd"; # read in the args to figure out our task $func=shift @ARGV; $SGE_ROOT="$ENV{SGE_ROOT}"; $arch=`${SGE_ROOT}/util/arch`; chomp($arch); $qrsh="${SGE_ROOT}/bin/${arch}/qrsh"; # add LAM and SGE to path $ENV{'PATH'}.=":${SGE_ROOT}/bin/${arch}"; $ENV{'PATH'}.=":${LAMHOME}/bin"; debug_print("LAMHOME = $LAMHOME"); debug_print("SGE_ROOT = $SGE_ROOT"); debug_print("PATH = $ENV{PATH}"); debug_print("qrsh = $qrsh"); debug_print("ARGV = \"".join("\" \"",@ARGV)."\""); if("$func" eq "start"){ debug_print("func=start"); print "Starting SGE + LAM Integration\n"; print "\t using tight integration scheme\n"; start_proc_args(); }elsif("$func" eq "stop"){ debug_print("func=stop"); print "Stoping SGE + LAM Integration\n"; stop_proc_args(); }elsif("$func" eq "qrsh-remote"){ debug_print("func=qrsh-remote"); qrsh_remote(); }elsif("$func" eq "qrsh-local"){ debug_print("func=qrsh-local"); qrsh_local(); }else{ print STDERR "\nusage: $0 {start|stop}\n\n"; exit(-1); } sub start_proc_args() { # we currently place the LAM host file in the TMPDIR that SGE uses. # if we place it elsewhere we need to clean it up $lamhostsfile="$ENV{TMPDIR}/lamhostfile"; # flags and options for lamboot (-x, -s and -np may be useful in some envs) @lambootargs=("-nn","-ssi","boot","rsh","-ssi","boot_rsh_agent","${LAMHOME}/bin/sge-lam qrsh-remote","-c","$sgelamconf"); if($verbose){ push(@lambootargs,"-v"); } if($debug){ push(@lambootargs,"-d"); } push(@lambootargs,"$lamhostsfile"); debug_print("LAMBOOT ARGS: @lambootargs $lamhostsfile"); ### Need to convert the SGE hostfile to a LAM hostfile format # open and read the PE hostfile open(SGEHOSTFILE,"< $ENV{PE_HOSTFILE}"); # convert to LAM bhost file format @lamhostslist=(); while(){ ($host,$ncpu,$junk)=split(/\s+/); push( @lamhostslist,"$host cpu=$ncpu"); } close(SGEHOSTFILE); # create the new lam bhost file open(LAMHOSTFILE,"> $lamhostsfile"); print LAMHOSTFILE join("\n",@lamhostslist); print LAMHOSTFILE "\n"; close(LAMHOSTFILE); if($debug){ close(SGEDEBUG); } exec($lamboot,@lambootargs); } sub stop_proc_args(){ if($verbose){ push(@lamhaltargs,"-v"); } if($debug){ push(@lamhaltargs,"-d"); } if($debug){ close(SGEDEBUG); } exec($lamhalt,@lamhaltargs); } sub qrsh_remote() { @myargs=("-inherit","-nostdin","-V",@ARGV); $i=0; while($i < $#myargs){ if($myargs[$i] =~ /^\-n$/){ splice(@myargs,$i,1); last; } ++$i; } debug_print("QRSH REMOTE CONFIG: @myargs"); if($debug){ close(SGEDEBUG); } exec($qrsh,@myargs); } sub qrsh_local() { # we are running a local qrsh to add the lamd into the current job # on the local node using the LAM boot schema # get the hostname to pass to qrsh $hostname=`/bin/hostname`; chomp($hostname); # tell SGE to add this command into the JOB_ID job by using qrsh -inherit # the hostname is not passed in this case in ARGV by lamboot @myargs=("-inherit","-nostdin","-V","$hostname",@ARGV); debug_print("QRSH LOCAL CONFIG: @myargs"); if($debug){ close(SGEDEBUG); } exec($qrsh,@myargs); } sub debug_print() { if($debug){ print SGEDEBUG "SGE-LAM DEBUG: @_\n"; } }