#!/bin/bash
#
#
##H============================================================================
##H                      SAS SPDE SerDe for Hive
##H============================================================================
##HSyntax:
##H
##H sh sashiveserdespde-installjar.sh -hive -mr
##H
##HWhere:
##H -hive | -hiveinstalldir  : Specify Hive installation directory
##H
##H -mr | -mrinstalldir      : Specify Map-Reduce install directory
##H
##H -jl | -jarloc            : Location of the SAS Hive SPDE Serde jars.  If
##H                none is specified, will use the directory this 
##H                script resides in.
##H
##H -hdfsuser                : Specifies the user name with write access 
##H                to the HDFS root folder.
##H
##H -host                    : A host or a list of hosts, separated by spaces.
##H
##H -yarnrm                  : Specifies the YARN Resource Manager web  
##H                application host name and port number in the format 
##H                <resourcemanagerhost:port>. This option may be used when the 
##H                yarn-site.xml property file is not present in the Hadoop 
##H                configuration folder, or the folder where the yarn-site.xml 
##H                resides is not in the path.  If not specified, the install 
##H                script will look for the YARN Resource Manager web application 
##H                host name and port number in the yarn-site.xml.
##H
##H -u | -uninstall          : Remove the SAS Hive SPDE Serde jars from the
##H                specified Hive and Map-Reduce directories.
##H
##H -h | -help               : Help to list the options and usage description
##H============================================================================
##XEND
THIS_PROGRAM=$0
ADMIN_UTIL_CLASSNAME=com.sas.hadoop.serde.spde.hive.utilities.AdminUtil

if [[ -z "$1" ]]
then
  echo "***************************************************************************"
  echo "Syntax: installSerdeJar.sh                                                 "
  echo "        -hive | -hiveinstalldir <Hadoop Hive install directory>            "
  echo "        -mr | -mrinstalldir <Hadoop Map-Reduce install directory>          "
  echo "        [-jl | -jarloc <SAS Hive SPDE SerDe jar location>]                 "
  echo "        [-hdfsuser <hdfs user ID>]                                         "
  echo "        [-host <host list>]                                                "
  echo "        [-yarnrm <resourcemanagerhost:port>]                               "
  echo "        [-h | -help Complete usage description]                            "
  echo "        [-u | -uninstall Remove the SerDe jars ]                           "
  echo "***************************************************************************"
  exit
fi

#======================================================
# the call to this function will remove leading and
# trailing spaces
#======================================================
trimSpaces()
{
 echo "$*"
}

#======================================================
# print the Syntax from the ##H lines above
#======================================================
printSyntax()
{
 THIS_FILE=${1}

 while read record; do
   head=`echo "$record" | cut -c1-3`

   if [ "$head" = "##H" ]; then
     echo "$record" | cut -c4-
   else if [ "$head" = "##X" ]; then
     exit -1
   fi
   fi
 done < $THIS_FILE
 
 exit -1
}

HDFS_USER=hdfs;
JAR_NAME=sas.HiveSerdeSPDE.jar;
NLS_JAR_NAME=sas.HiveSerdeSPDE.nls.jar;

#======================================================
# process the user arguments
#======================================================
while [ "$1" != "" ]; do
 case $1 in
  -jl | -jarloc )
    SERDE_JAR_LOC=`trimSpaces $2`
    shift 2
    if [ "$SERDE_JAR_LOC" == "" ]; then
      echo "ERROR: A value for option -jarloc was not specified."
      exit 1
    fi
    ;;
  -hdfsuser )
    HDFS_USER=$2
    shift 2
    if [ "$HDFS_USER" == "" ]; then
      echo "ERROR: A value for option -hdfsuser was not specified."
      exit 1
    fi
    ;;    
  -hive | -hiveinstalldir )
    SERDE_HIVE_DIR=`trimSpaces $2`
    shift 2
    if [ "$SERDE_HIVE_DIR" == "" ]; then
      echo "ERROR: A value for option -hiveinstalldir was not specified."
      exit 1
    fi
    ;;
  -mr | -mrinstalldir )
    SERDE_MR_DIR=`trimSpaces $2`
    shift 2
    if [ "$SERDE_MR_DIR" == "" ]; then
      echo "ERROR: A value for option -mrinstalldir was not specified."
      exit 1
    fi
    ;;
  -host )
    SERDE_HOSTLIST=`trimSpaces $2`
    shift 2
    if [ "$SERDE_HOSTLIST" == "" ]; then
      echo "ERROR: A value for option -host was not specified."
      exit 1
    fi
    ;;
  -yarnrm )
    YARN_RESOURCE_MANAGER=`trimSpaces $2`
    shift 2
    if [ "$YARN_RESOURCE_MANAGER" == "" ]; then
      echo "ERROR: A value for option -yarnrm was not specified."
      exit 1
    fi
    ;;

  -u | -uninstall )
    SERDE_UNINSTALL="T"
    shift 1
    ;;
  -h | -help )
    printSyntax "${THIS_PROGRAM}"
    ;;
  * )
    echo ""
    echo "ERROR: \"${1}\" is an invalid option."
    exit 1
    ;;
 esac
done

if [ "$SERDE_JAR_LOC" == "" ]; then
  SERDE_JAR_LOC=$(pwd)
fi

if [ "$SERDE_HIVE_DIR" == "" ] || [ "$SERDE_MR_DIR" == "" ]; then
  echo "ERROR: Both Hadoop Hive and Map-Reduce installation directories are required to be specified."
  exit 1
fi
 
 SSH_OPTIONS="-x -q -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o UserKnownHostsFile=/dev/null" 
 SCP_OPTIONS="-q -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o UserKnownHostsFile=/dev/null"
 SERDE_NLS_JAR_LOC="$SERDE_JAR_LOC/$NLS_JAR_NAME"
 SERDE_JAR_LOC="$SERDE_JAR_LOC/$JAR_NAME"
 SERDE_JAR_HIVE="$SERDE_HIVE_DIR/$JAR_NAME"
 SERDE_NLS_JAR_HIVE="$SERDE_HIVE_DIR/$NLS_JAR_NAME"
 SERDE_JAR_MR="$SERDE_MR_DIR/$JAR_NAME"
 SERDE_NLS_JAR_MR="$SERDE_MR_DIR/$NLS_JAR_NAME"

if [ ! -e $SERDE_JAR_LOC ]; then
 if [ ! -z $SERDE_UNINSTALL ]; then
  SERDE_JAR_LOC=$SERDE_JAR_HIVE
 fi 
fi

if [ ! -e $SERDE_JAR_LOC ]; then
  echo "ERROR: ${SERDE_JAR_LOC} not found."  
  exit 1 
fi

export HADOOP_USER_CLASSPATH_FIRST=yes
export HADOOP_CLASSPATH="${SERDE_JAR_LOC}"

#======================================================
# Check to see if running on MapR
#======================================================
isMapR()
{
 hadoop ${ADMIN_UTIL_CLASSNAME} -isMapR 
}

#======================================================
# check to see if the specified user is valid
#======================================================
isUserExist2()
{
  userName=$1

  if [ -z $1 ]; then
    echo -e n
    return
  fi

  # Check if the user exists in the system
  id=`id ${userName} > /dev/null; echo -e $?`

  if [ $id -ne 0 ]; then
    echo -e n
    return
  fi

  echo -e y
  return
}

#======================================================
# Get the list of data nodes
#======================================================
getNodesList()
{
  if [ $IS_MAPR -eq 1 ]; then
    MAPRCLI=`which maprcli 2> /dev/null`
    if [ "$MAPRCLI" != "" ]; then
      eval $MAPRCLI node list -columns hostname | awk '{if ( $1 != "hostname" && $2 != "ip" ) hostList=hostList $1 " "} END {print hostList}'
    fi
  else
    sudo su ${HDFS_USER} -c "hadoop jar ${SERDE_JAR_LOC} ${ADMIN_UTIL_CLASSNAME} -listdatanodes"
  fi 
}


#==========================================================
# Get a list of LIVE data nodes installed on the cluster
#==========================================================
getMapRYarnNodesList()
{
  MAPRCLI=`which maprcli 2> /dev/null`
  #----------------------------------------------------------
  # MapR specific
  #----------------------------------------------------------
  if [ "$MAPRCLI" != "" ]; then
    #----------------------------------------------------------
    # MapR major version number.
    #----------------------------------------------------------
    let MAPR_TARGETVERSION=`$RUNASHDFSUSER "$MAPRCLI config load -keys mapr.targetversion" | awk '{ if ($1 != "mapr.targetversion") print $1;}' | awk -F. '{print $1}'`

    if [ $MAPR_TARGETVERSION -lt 6 ]; then
      $RUNASHDFSUSER "$MAPRCLI node list -columns service" | egrep "resourcemanager|nodemanager" | awk '{hostList=hostList $2 " "} END {print hostList}'
    else
      $RUNASHDFSUSER "$MAPRCLI node list -columns service" | egrep "resourcemanager|nodemanager" | awk '{hostList=hostList $1 " "} END {print hostList}'
    fi
    rc=$?
    return $rc
  else
    echo "MapR command line interface maprcli not found in the path."
    return 1
  fi
}
#==========================================================
# Get a list of YARN node managers nodes
#==========================================================
getYarnNodesList()
{
  if [ -z "$YARN_RESOURCE_MANAGER" ] && [ $IS_MAPR -eq 1 ]; then
    getMapRYarnNodesList
    rc=$?
    return $rc
  fi

  hadoop jar ${SERDE_JAR_LOC} ${ADMIN_UTIL_CLASSNAME} -listyarnnodes $YARN_RESOURCE_MANAGER 2>&1

  let rc=$?

  return $rc
}

#======================================================
# add the localhost to the list of nodes if it is
# not already there
#======================================================
checkNodesList()
{
 LOCALHOST=$(hostname)
 for node in $SERDE_HOSTLIST; do
  if [ "${node%%.*}" == "${LOCALHOST%%.*}" ]; then
   isHostFound=1;
  fi
 done
 
 if [ ! $isHostFound ]; then
  SERDE_HOSTLIST="$SERDE_HOSTLIST $LOCALHOST"
 fi
}

IS_MAPR=$(isMapR)
if [ $IS_MAPR -eq 1 ]; then
    HDFS_USER=mapr;
fi

if [ $IS_MAPR -eq 0 ]; then
  # Make sure the specified hdfs user exists
  isValidUser=`isUserExist2 $HDFS_USER`
  if [ "${isValidUser}" != "y" ]; then
    echo "ERROR: User [${HDFS_USER}] does not exist. Use -hdfsuser <userName> option to specify the Hadoop FS user name."
    exit 1
  fi
fi

RUNASHDFSUSER="sudo -n -u $HDFS_USER /bin/bash -c "

if [ "$SERDE_HOSTLIST" == "" ]; then
  # only get the host list if the user has not already specified it
  SERDE_HOSTLIST=$(getYarnNodesList)
  if [ $? -ne 0 ]; then
    SERDE_HOSTLIST=""
    echo "ERROR: Unable to retrieve a list of YARN Node Manager nodes."
    if [ -z "$YARN_RESOURCE_MANAGER" ] ; then    
      echo "Try specifying either -host or -yarnrm."
    else
      echo "Try specifying -host."
    fi    
  fi
fi

# If the list of data nodes are not found even after it's retrieved from cluster, we will have to throw
# an error and ask the user to specify it.
if [ "$SERDE_HOSTLIST" == "" ]; then
 echo "ERROR: The list of hosts serving as data nodes in the cluster is empty."  
 exit 1
fi

checkNodesList

if [ -z $SERDE_UNINSTALL ]; then
 for node in $SERDE_HOSTLIST; do
  if [ "${node%%.*}" == "${LOCALHOST%%.*}" ]; then
   echo "cp ${SERDE_JAR_LOC} ${SERDE_HIVE_DIR}"
   cp ${SERDE_JAR_LOC} ${SERDE_HIVE_DIR}
   echo "cp ${SERDE_NLS_JAR_LOC} ${SERDE_HIVE_DIR}"
   cp ${SERDE_NLS_JAR_LOC} ${SERDE_HIVE_DIR}
   echo "cp ${SERDE_JAR_LOC} ${SERDE_MR_DIR}"
   cp ${SERDE_JAR_LOC} ${SERDE_MR_DIR}
   echo "cp ${SERDE_NLS_JAR_LOC} ${SERDE_MR_DIR}"
   cp ${SERDE_NLS_JAR_LOC} ${SERDE_MR_DIR}
 else
   echo "scp $SCP_OPTIONS ${SERDE_JAR_LOC} ${USER}@${node%%.*}:${SERDE_HIVE_DIR}"
   scp $SCP_OPTIONS ${SERDE_JAR_LOC} ${USER}@${node%%.*}:${SERDE_HIVE_DIR}
   echo "scp $SCP_OPTIONS ${SERDE_NLS_JAR_LOC} ${USER}@${node%%.*}:${SERDE_HIVE_DIR}"
   scp $SCP_OPTIONS ${SERDE_NLS_JAR_LOC} ${USER}@${node%%.*}:${SERDE_HIVE_DIR}
   echo "scp $SCP_OPTIONS ${SERDE_JAR_LOC} ${USER}@${node%%.*}:${SERDE_MR_DIR}"
   scp $SCP_OPTIONS ${SERDE_JAR_LOC} ${USER}@${node%%.*}:${SERDE_MR_DIR}
   echo "scp $SCP_OPTIONS ${SERDE_NLS_JAR_LOC} ${USER}@${node%%.*}:${SERDE_MR_DIR}"
   scp $SCP_OPTIONS ${SERDE_NLS_JAR_LOC} ${USER}@${node%%.*}:${SERDE_MR_DIR}
  fi 
 done
else 
 for node in $SERDE_HOSTLIST; do
  if [ "${node%%.*}" == "${LOCALHOST%%.*}" ]; then
   echo "'rm ${SERDE_JAR_HIVE}'"
   rm ${SERDE_JAR_HIVE}
   echo "'rm ${SERDE_NLS_JAR_HIVE}'"
   rm ${SERDE_NLS_JAR_HIVE}
   echo "'rm ${SERDE_JAR_MR}'"
   rm ${SERDE_JAR_MR}
   echo "'rm ${SERDE_NLS_JAR_MR}'"
   rm ${SERDE_NLS_JAR_MR}
  else
   echo "ssh ${SSH_OPTIONS} ${USER}@${node%%.*} 'rm ${SERDE_JAR_HIVE}'"
   ssh ${SSH_OPTIONS} ${USER}@${node%%.*} "rm ${SERDE_JAR_HIVE}"
   echo "ssh ${SSH_OPTIONS} ${USER}@${node%%.*} 'rm ${SERDE_NLS_JAR_HIVE}'"
   ssh ${SSH_OPTIONS} ${USER}@${node%%.*} "rm ${SERDE_NLS_JAR_HIVE}"
   echo "ssh ${SSH_OPTIONS} ${USER}@${node%%.*} 'rm ${SERDE_JAR_MR}'"
   ssh ${SSH_OPTIONS} ${USER}@${node%%.*} "rm ${SERDE_JAR_MR}"
   echo "ssh ${SSH_OPTIONS} ${USER}@${node%%.*} 'rm ${SERDE_NLS_JAR_MR}'"
   ssh ${SSH_OPTIONS} ${USER}@${node%%.*} "rm ${SERDE_NLS_JAR_MR}"
  fi 
 done
fi
