#! /bin/sh

# autoban v0.5
# Automatically ban IP addresses
# Copyright (c) 2023 Raphaël Halimi <raphael.halimi@gmail.com>

# TODO function ip2regex (what is this ?)
# TODO function check_ban, check data and ban separately
# TODO use another delimiter (why ?)
# TODO test if sleep is still necessary, and where


#
# Libraries
#

. /lib/firewall/autoban/shell-script-helper
. /lib/firewall/autoban/shell-script-helper-log
. /lib/firewall/autoban/shell-script-helper-arrays


#
# Traps
#

trap reload HUP


#
# Variables
#

# Options defaults
INPUT_FILE="/var/log/syslog"
BACK_LOG=0
WHITELIST=""
THRESHOLD=3
BAN_TIME=3600
MAX_SIZE=0
LOG_SYSLOG=0
LOG_TOFILE=0
LOG_FILE=""
CHECK=0
VERBOSE=0
DEBUG=0

# Internal variables
LIB_DIR="/lib/firewall"
ETC_DIR="/etc/firewall"
LIB_REGEX="$LIB_DIR/regex.d"
ETC_REGEX="$ETC_DIR/regex.d"
ETC_RULES="$ETC_DIR/rules.d"
CONFIG_FILE="$ETC_DIR/autoban.conf"
BAN_FILE="$ETC_RULES/10-autoban.rule"
FIFO_FILE="/run/$SCRIPT_NAME.fifo"
MIN_SIZE=1024
SLEEP=0.001
LOG_FACILITY="daemon"


#
# Functions
#

# Process options
# ARGS: none
process_opts () {
  local OPTION
  eval "set -- $ARGS"
  while getopts "i:b:w:n:t:m:sfo:cvdh" OPTION ; do
    case $OPTION in
      i) INPUT_FILE="$OPTARG" ;;
      b) BACK_LOG="$OPTARG" ;;
      w) WHITELIST="$OPTARG" ;;
      n) THRESHOLD="$OPTARG" ;;
      t) BAN_TIME="$OPTARG" ;;
      m) MAX_SIZE="$OPTARG" ;;
      s) LOG_SYSLOG=1 ;;
      f) LOG_TOFILE=1 ;;
      o) LOG_TOFILE=1 ; LOG_FILE="$OPTARG" ;;
      c) CHECK=1 ;;
      v) enable_verbose ;;
      d) enable_debug ;;
      h) print_usage ; exit 0 ;;
      *) print_usage ; exit 1 ;;
    esac
  done
}

# Check options which expect an integer
# ARGS: none
check_int_opts () {
  local OPT VAL DEF
  for OPT in BACK_LOG THRESHOLD BAN_TIME MAX_SIZE LOG_SYSLOG LOG_TOFILE CHECK VERBOSE DEBUG ; do
    VAL="$(eval "printf %s \"\$$OPT\"")"
    if ! check_int "$VAL" ; then
      DEF="$(grep -m 1 "^$OPT=" "$0")"
      print_message warning "Value '$VAL' invalid for option $OPT (expected integer), using default '${DEF#*=}'"
      eval "$OPT=\"${DEF#*=}\""
    fi
  done
}

# Check data source
# ARGS: none
check_data_source () {
  # Check if journal or syslog are available
  if pgrep -f systemd-journald > /dev/null ; then
    INPUT_CMD="journalctl -q -f -n ${BACK_LOG:-0}"
  elif [ -e "$INPUT_FILE" ] ; then
    INPUT_CMD="tail -n ${BACK_LOG:-0} -F $INPUT_FILE"
  else
    die "No data source found (no journald nor syslog file found)"
  fi
  debug_var INPUT_CMD
}

# Check MAX_SIZE size
# ARGS: none
check_max_size () {
  # Check if MAX_SIZE is not too small
  if ( [ $MAX_SIZE -ne 0 ] && [ $MAX_SIZE -lt $MIN_SIZE ] ) ; then
    print_message warning "Size '$MAX_SIZE' invalid for option MAX_SIZE (too small), using hardcoded minimum '$MIN_SIZE'"
    MAX_SIZE=$MIN_SIZE
  fi
}

# List preferred files in directories
# ARGS: DIR...
# Specify DIR... in order of preference
list_preferred_files () {
  local DIRS DIR FILE
  DIRS=""
  for DIR ; do
    if [ -e "$DIR" ] ; then
      DIRS="$DIRS $DIR"
    fi
  done
  if [ -n "$DIRS" ] ; then
    for FILE in $(basename -a $(find $DIRS ! -type d -name "*.regex") | sort -u) ; do
      for DIR in $DIRS ; do
        if [ -e "$DIR/$FILE" ] ; then
          if [ -s "$DIR/$FILE" ] ; then printf "%s\n" "$DIR/$FILE" ; fi
          break
        fi
      done
    done
    return 0
  else
    print_message err "list_preferred_files: no directories found"
  fi
}

# Build regex table
# ARGS: none
build_regex_table () {
  local LINE SERVICE REGEX RES RC
  print_message notice "Building regular expressions table"
  RC=0
  REGEX_FILES="$(list_preferred_files "$ETC_REGEX" "$LIB_REGEX")"
  REGEX_TABLE="$(sed -E 's/^#.*$// ; /^[[:blank:]]*$/d' ${REGEX_FILES:-/dev/null} | sort -u)"
  if [ $CHECK -eq 1 ] ; then
    printf "\nFiles:\n%s\n" "$REGEX_FILES"
  fi
  if [ -n "$REGEX_TABLE" ] ; then
    if [ $CHECK -eq 1 ] ; then
      printf "\nRegular expressions:\n"
    fi
    while read -r LINE ; do
      SERVICE="${LINE%% *}"
      REGEX="${LINE#* }"
      if [ $CHECK -eq 1 ] ; then
        printf "service='%s' regex='%s'\n" "$SERVICE" "$REGEX"
      fi
      # Very basic test
      printf %s '' | sed -n -E "s/$REGEX//p"
      RES=$?
      if [ $RES -ne 0 ] ; then
        RC=1
        print_message error "regular expression '$REGEX' for service '$SERVICE' returned an error"
      fi
    done << EOF
$(printf "%s\n" "$REGEX_TABLE")
EOF
    SERVICES="$(printf %s "$REGEX_TABLE" | cut -d ' ' -f 1 | sort -u | xargs)"
    if [ $CHECK -eq 1 ] ; then
      printf "\nServices:\n%s\n" "$(printf "%s\n" "$SERVICES" | tr ' ' '\n')"
      if [ $RC -eq 0 ] ; then
        print_escape_sequence green
        printf "\nAll checks passed!\n"
      else
        print_escape_sequence red
        printf "\nError(s) were found.\n"
      fi
      print_escape_sequence reset
      exit $RC
    else
      if [ $RC -eq 0 ] ; then
        print_message notice "Loaded $(printf "%s\n" "$REGEX_TABLE" | wc -l) regex"
        debug_var SERVICES
      else
        die "One or more regular expression(s) produced error(s)"
      fi
    fi
  else
    die "Empty regular expressions table"
  fi
}

# Surround lists with spaces to match on substring parameter expansion
# ARGS: none
format_lists () {
  SERVICES=" $SERVICES "
  WHITELIST=" $(printf %s "$WHITELIST" | xargs) "
}

# Debug config
# ARGS: none
debug_config () {
  if [ $DEBUG -eq 1 ] ; then
    debug_var \
      SCRIPT_NAME \
      \$ \
      ARGS \
      INPUT_FILE \
      BACK_LOG \
      WHITELIST \
      THRESHOLD \
      BAN_TIME \
      MAX_SIZE \
      LOG_SYSLOG \
      LOG_TOFILE \
      LOG_FILE \
      LOG_HOSTNAME \
      VERBOSE \
      DEBUG \
      LIB_DIR \
      ETC_DIR \
      LIB_REGEX \
      ETC_REGEX \
      ETC_RULES \
      CONFIG_FILE \
      BAN_FILE \
      FIFO_FILE \
      MIN_SIZE \
      SLEEP \
      LOG_FACILITY
  fi
}

# Set timestamp to current time
# ARGS: none
set_timestamp_now () {
  TIMESTAMP_NOW=$(date +%s)
}

# Recover previous bans from BAN_FILE
# ARGS: none
recover_bans () {
  local LINE DATA IP COMMENT SERVICE TIMESTAMP_LAST
  set_timestamp_now
  print_message notice "Recovering data from previous sessions"
  while read -r LINE ; do
    DATA="$(printf %s "$LINE" | sed -E 's/^ipv. && ban_ip (.*)[[:blank:]]+# (.*)$/\1 \2/')"
    if [ "$DATA" != "$LINE" ] ; then
      IP="${DATA% *}"
      COMMENT="${DATA#* }"
      SERVICE="${COMMENT%%|*}"
      TIMESTAMP_LAST="${COMMENT##*|}"
      if [ $((TIMESTAMP_LAST+BAN_TIME)) -ge $TIMESTAMP_NOW ] ; then
        print_message notice "$IP banned on $(date -R -d@$TIMESTAMP_LAST) ($SERVICE)"
        aa_key_set "$(ip2key "$IP")" "$THRESHOLD|$COMMENT"
      else
        unban_ip "$IP"
      fi
    fi

  done << EOF
$(cat "$BAN_FILE")
EOF
}

# Prepare FIFO
# ARGS: none
prepare_fifo () {
  # Create FIFO
  print_message debug "create FIFO file '$FIFO_FILE'"
  mkfifo -m 600 "$FIFO_FILE"
  # Feed the FIFO with the system log
  print_message debug "run $INPUT_CMD > "$FIFO_FILE" &"
  $INPUT_CMD 2>/dev/null > "$FIFO_FILE" &
  INPUT_PID=$!
  debug_var INPUT_PID
}

# Cleanup on exit
# ARGS: none
cleanup () {
  # In case normal user runs the script (allowed for -h and -c)
  if [ "$(id -u)" = "0" ] ; then
    print_message debug "clean up"
    if [ -n "$INPUT_PID" ] ; then
      if [ "$(ps -q "$INPUT_PID" -o comm=)" = "${INPUT_CMD%% *}" ] ; then
        print_message debug "kill PID '$INPUT_PID' (${INPUT_CMD%% *})"
        kill $INPUT_PID
      else
        print_message debug "PID '$INPUT_PID' not found"
      fi
    fi
    delete_files "$FIFO_FILE"
  fi
}

# Reload configuration
# ARGS: none
reload () {
  print_message notice "Received SIGHUP, reloading"

  # Cleanup and redo all startup steps, except:
  # - force BACK_LOG=0 (no need to re-read log)
  # - force CHECK=0 (in case user sillily sets it in the config file)
  # - don't recover bans (array still in memory)
  
  # Stop background processes, remove files
  cleanup
  close_logfile

  # Options defaults
  INPUT_FILE="/var/log/syslog"
  WHITELIST=""
  THRESHOLD=3
  BAN_TIME=3600
  MAX_SIZE=0
  LOG_SYSLOG=0
  LOG_TOFILE=0
  LOG_FILE=""
  VERBOSE=0
  DEBUG=0

  # Read configuratoin file
  [ -e "$CONFIG_FILE" ] && . "$CONFIG_FILE"

  # Process command-line options
  process_opts

  # Force options
  BACK_LOG=0
  CHECK=0

  # Checks
  check_int_opts
  init_log
  check_dep firewall
  check_data_source
  check_max_size
  build_regex_table
  format_lists
  debug_config

  # Restart
  prepare_fifo
  main_loop
}

# Check if IP address is IPv4 or IPv6
# ARGS: IP
ip_version () {
  local IP
  IP="$1"
  if printf %s "$IP" | grep -q -E "^[0-9\.]+$" ; then
    printf %d 4
  elif printf %s "$IP" | grep -q -E "^[0-9a-fA-F:]+$" ; then
    printf %d 6
  else
    print_message err "String '$IP' is neither an IPv4 nor an IPv6 address" ; return 1
  fi
}

# Convert IP to suitable key
# ARGS: IP
ip2key () {
  local IP IPV
  IP="$1"
  IPV="$(ip_version "$IP")"
  case "$IPV" in
    4|6) printf %s_%s "$IPV" "$IP" | tr .: _ ;;
    *) print_message err "String '$IP' is neither an IPv4 nor an IPv6 address" ; return 1 ;;
  esac
}

# Convert key name to IP
# ARGS: KEY
key2ip () {
  local KEY IPV
  KEY="$1"
  IPV="$(printf %.1s "$KEY")"
  case "$IPV" in
    4) printf %s "${KEY#?_}" | tr _ . ;;
    6) printf %s "${KEY#?_}" | tr _ : ;;
    *) print_message err "Key '$KEY' can't be translated to an IP address" ;;
  esac
}

# Ban IP
# ARGS: IP
ban_ip () {
  local IP COMMENT IPV
  IP="$1"
  COMMENT="$2"
  IPV="$(ip_version "$IP")"
  print_message debug "run $FIREWALL_BIN -e \"if ipv$IPV ; then ban_ip $IP ; fi\""
  $FIREWALL_BIN -e "if ipv$IPV ; then ban_ip $IP ; fi" || print_message err "$FIREWALL_BIN command exited with return code $?"
  print_message debug "add IP '$IP' to ban file"
  printf "ipv%d && ban_ip %s\t# %s\n" "$IPV" "$IP" "$COMMENT" >> "$BAN_FILE"
}

# Unban IP
# ARGS: IP
unban_ip () {
  local IP IPV
  IP="$1"
  IPV="$(ip_version "$IP")"
  print_message notice "Removing ban for IP '$IP'"
  print_message debug "run $FIREWALL_BIN -e \"if ipv$IPV ; then unban_ip $IP ; fi\""
  $FIREWALL_BIN -e "if ipv$IPV ; then unban_ip $IP ; fi" || print_message err "$FIREWALL_BIN command exited with return code $?"
  print_message debug "remove IP '$IP' from ban file"
  sed -E -i "/[[:blank:]]$IP[[:blank:]]/d" "$BAN_FILE"
}

# Êxpire key (forget IP)
# ARGS: KEY
expire_key () {
  local KEY IP
  KEY="$1"
  IP="$(key2ip "$KEY")"
  print_message info "Deleting expired data about IP '$IP'"
  aa_key_remove "$KEY"
  unban_ip "$IP"
}

# Clear expired keys
# ARGS: none
clear_keys () {
  local TRIM ARRAY SIZE LINE KEY COUNTER TIMESTAMP
  print_message debug "clear expired keys"
  TRIM=0
  # Dump array, sorted by timestamp
  ARRAY="$(aa_dump | sort -t '|' -k 4)"
  if [ -n "$ARRAY" ] ; then
    if [ $MAX_SIZE -gt 0 ] ; then
      SIZE=$(printf %s "$ARRAY" | wc -c)
      print_message debug "array $SIZE bytes"
      if [ $SIZE -ge $MAX_SIZE ] ; then
        print_message info "Array full ($SIZE/$MAX_SIZE bytes), trimming..."
        TRIM=1
      fi
    fi
    while read -r LINE ; do
      KEY="${LINE%%=*}"
      COUNTER="${LINE#*=}" ; COUNTER="${COUNTER%%|*}"
      TIMESTAMP="${LINE##*|}"
      debug_var LINE KEY COUNTER TIMESTAMP
      # Remove expired keys
      if ( [ $TRIM -eq 1 ] && [ $COUNTER -lt $THRESHOLD ] ) ; then
        print_message debug "trim ${#LINE} bytes"
        expire_key "$KEY"
        SIZE=$(aa_dump | wc -c)
        print_message debug "array $SIZE bytes"
        if [ $SIZE -lt $MAX_SIZE ] ; then
          print_message debug "done trim"
          TRIM=0
        fi
      fi
      if [ $((TIMESTAMP+BAN_TIME)) -le $TIMESTAMP_NOW ] ; then
        expire_key "$KEY"
      else
        if [ $TRIM -eq 0 ] ; then
          # Break when we reach a key recent enough
          break
        fi
      fi
      # We have to sleep a little bit or else systemd won't be happy
      sleep $SLEEP
    done << EOF
$(printf "%s" "$ARRAY")
EOF
  fi
  print_message debug "done clear"
}

# Print help
# ARGS: none
print_usage () {
  printf "Usage: %s [OPTION]...\n" "$SCRIPT_NAME"
  printf "\nOPTIONS:\n"
  print_option "-i INPUT_FILE" "Input file (default: '$INPUT_FILE')"
  print_option "-b BACK_LOG" "Lines to read back from log (default: $BACK_LOG)"
  print_option "-w WHITELIST" "Whitelist (space-separated list of IPs, default: '$WHITELIST')"
  print_option "-n THRESHOLD" "Number of errors to trigger a ban (default: $THRESHOLD)"
  print_option "-t BAN_TIME" "Duration of a ban (in seconds, default: $BAN_TIME)"
  print_option "-m MAX_SIZE" "Max array size (in bytes, 0 for no limit, default: $MAX_SIZE)"
  print_option "-s" "Log to syslog (default: $LOG_SYSLOG)"
  print_option "-f" "Log to file (default: $LOG_TOFILE)"
  print_option "-o LOG_FILE" "Log file (default: '${LOG_FILE:-/var/log/$SCRIPT_NAME.log}')"
  print_option "-c" "Check"
  print_option "-v" "Verbose mode"
  print_option "-d" "Debug mode"
  print_option "-h" "Print this help message"
}


#
# Configuration file
#

[ -e "$CONFIG_FILE" ] && . "$CONFIG_FILE"


#
# Options processing
#

ARGS="$(quote "$@")"
process_opts


#
# Checks
#

check_int_opts
if [ $CHECK -eq 0 ] ; then
  root_only
  lock_script
  init_log
fi
check_dep firewall
check_data_source
check_max_size
build_regex_table
format_lists
debug_config


#
# Main
#

recover_bans
prepare_fifo

# Enclose main loop in a function, so that it can be reloaded with SIGHUP
main_loop () {

  # Feed the parser with the FIFO
  while read -r LOG_LINE ; do
    set_timestamp_now
    REGEX_SERVICE=""

    # In debug mode, ignore our own lines
    if [ $DEBUG -eq 1 ] ; then
      if [ "$LOG_LINE" != "${LOG_LINE#* $SCRIPT_NAME\[$$]: *}" ] ; then continue ; fi
    fi

    # Extract tag and message from log line
    LOG_LINE="$(printf %s "$LOG_LINE" | sed -n -E 's/^[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +([^:]+): (.*)$/\1 \2/p')"
    if [ -z "$LOG_LINE" ] ; then
      print_message error "cannot extract info from log line, sed s command failed"
      continue
    else
      SERVICE="${LOG_LINE%% *}" ; SERVICE="${SERVICE%[*}" ; SERVICE="${SERVICE%:*}"
      MESSAGE="${LOG_LINE#* }"
      debug_var SERVICE MESSAGE
    fi

    # If tag is in known list, find related regex
    if [ "$SERVICES" != "${SERVICES#* $SERVICE *}" ] ; then
      print_message debug "regex found for service '$SERVICE'"
      REGEX_SERVICE="$(printf %s "$REGEX_TABLE" | grep "^$SERVICE ")"
    else
      print_message debug "skip unknown service '$SERVICE'" ; clear_keys ; continue
    fi

    # Compare the line to each regex in $REGEX_TABLE
    while read -r REGEX ; do
      REGEX="${REGEX#* }"
      debug_var REGEX

      # Try to match line against regex
      # If the regexp starts with the sub-expression, we have to match from the start
      if [ "$(printf %.1s "$REGEX")" = "(" ] ; then
        IP="$(printf %s "$MESSAGE" | sed -n -E "s/^$REGEX.*$/\1/p")"
      else
        IP="$(printf %s "$MESSAGE" | sed -n -E "s/^.*$REGEX.*$/\1/p")"
      fi

      # If the regex matched, proceed
      if [ -n "$IP" ] ; then

        print_message debug "match found"
        debug_var IP

        # Stop processing if IP address is whitelisted
        if [ "$WHITELIST" != "${WHITELIST#* $IP *}" ] ; then
          print_message info "Ignoring failure from whitelisted IP '$IP' on service '$SERVICE'"
          break
        else
          # Get SERVICE
          KEY="$(ip2key "$IP")"
          debug_var KEY

          # If IP has been seen already, extract known data
          DATA="$(aa_key_get "$KEY")"
          if [ -n "$DATA" ] ; then
            print_message debug "known IP"
            TIMESTAMP_ORIG="$(printf %s "$DATA" | cut -d '|' -f 3)"
            COUNTER="$(($(printf %s "$DATA" | cut -d '|' -f 1)+1))"
            debug_var DATA TIMESTAMP_ORIG COUNTER

            # If IP has already been banned, stop processing
            if [ $COUNTER -gt $THRESHOLD ] ; then
              print_message debug "ignore already banned IP '$IP'"
              break
            fi

          else
            print_message debug "new IP"
            TIMESTAMP_ORIG=$TIMESTAMP_NOW
            COUNTER=1
            debug_var TIMESTAMP_ORIG COUNTER
          fi
          debug_var TIMESTAMP_NOW

          # Log data with new COUNTER and updated TIMESTAMP
          aa_key_set "$KEY" "$COUNTER|$SERVICE|$TIMESTAMP_ORIG|$TIMESTAMP_NOW"

          # Report
          print_message notice "Failure #$COUNTER from '$IP' on service '$SERVICE'"

          # If COUNTER equals threshold, ban IP
          if [ $COUNTER -eq $THRESHOLD ] ; then
            print_message debug "ban IP '$IP'"
            print_message notice "Banning $IP for ${BAN_TIME}s ($COUNTER failures in $((TIMESTAMP_NOW-TIMESTAMP_ORIG))s)"
            ban_ip "$IP" "$SERVICE|$TIMESTAMP_ORIG|$TIMESTAMP_NOW"
          fi

          # Break from loop, since a line can't match more than one regex
          break
        fi

      fi

    done << EOF
$(printf "%s\n" "$REGEX_SERVICE")
EOF

    clear_keys

  done < "$FIFO_FILE"
}

# Run main loop
main_loop

# We shouldn't get here
print_message crit "Exited main loop, process '${INPUT_CMD%% *}' probably died"
exit 1
