#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
apache_suspicious_scan_live.py
Scanner für Apache-Access-Logs mit Live-Blocking, Rate-Limit, SQLi-Detection,
Whitelist, nft/iptables, per-IP Counter ("Hits") und HTML-Statusseite.
"""
import argparse, glob, gzip, os, re, sys, json, time, shutil, subprocess, ipaddress, urllib.parse
from datetime import datetime, timedelta
from collections import defaultdict, Counter, deque

# ---------- Heuristics ----------
THRESH_UNIQUE_PHP_PER_IP     = 20
THRESH_404_RATIO             = 0.5
MIN_REQUESTS_FOR_404_RATIO   = 20
THRESH_REQS_PER_SEC          = 0.5
MIN_REQUESTS_FOR_RATE        = 10
THRESH_UNIQUE_UA_PER_IP      = 5
LONG_URL_LENGTH              = 1000

WEIGHTS = {
    "sqli":         5,
    "lfi_rfi":      4,
    "sensitive_1":  2,
    "sensitive_3":  3,
    "binary":       2,
    "long_uri":     1,
    "non_print":    1,
}

LFI_RFI_RE = re.compile(r"(\.\./|%2e%2e|php://|/etc/passwd|/proc/self/environ|/etc/shadow|base64_decode\(|eval\(|system\()", re.IGNORECASE)
SENSITIVE_FILES_RE = re.compile(r"(?i)(?:\.env|\.git|composer\.json|wp-login\.php|xmlrpc\.php|phpmyadmin|adminer|\.sql|\.bak|\.old|backup|config\.php)")
NON_PRINTABLE_RE   = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]")

LOG_RE   = re.compile(r'(?P<ip>\S+) (?P<ident>\S+) (?P<auth>\S+) \[(?P<date>.+?)\] "(?P<req>.+?)" (?P<status>\d{3}) (?P<size>\S+)(?: "(?P<referer>.*?)" "(?P<agent>.*?)")?')
DATE_FMT = "%d/%b/%Y:%H:%M:%S %z"

DEFAULT_STATUS_PAGE     = "/var/www/html/security_scanner/secscanstat.html"
DEFAULT_STATUS_INTERVAL = 60  # seconds

# ---------- Globals ----------
ip_stats = defaultdict(lambda: {
    "count": 0, "first_ts": None, "last_ts": None,
    "paths": Counter(), "unique_php": set(), "statuses": Counter(),
    "uas": Counter(), "req_lines": [], "flags": Counter(),
})
suspicious_events = []   # [(ip, reason, detail, when)]
total_lines_attempted = 0
processed_entries = 0

# per-IP Sliding Window (rate limit)
ip_recent = defaultdict(lambda: deque())

# firewall state
blocked_ips     = set()
blocked_backend = {}  # ip -> "nft" | "iptables"

NFT_TABLE  = "apache_scan"
NFT_FAMILY = "inet"
NFT_SET    = "bad_ips"
NFT_CHAIN  = "input_drop"
PERIP_CHAIN= "per_ip_drop"   # per-IP counter rules
IPT_COMMENT= "apache-scan"

# FW counters (polled)
fw_hits = {}                # ip -> packets
FW_POLL_INTERVAL = 60
_last_fw_poll = 0.0

# status page
STATUS_PAGE_PATH  = DEFAULT_STATUS_PAGE
STATUS_INTERVAL   = DEFAULT_STATUS_INTERVAL
_last_status_write= 0.0

# whitelist
WHITELIST_MODE = "block-only"  # or "ignore"
_whitelist_networks = []
_whitelist_ips      = set()

# ---------- utils ----------
def have_cmd(cmd): return shutil.which(cmd) is not None
def run(cmd):
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    out, err = p.communicate()
    return p.returncode, out.strip(), err.strip()

def open_log(path):
    return gzip.open(path, "rt", encoding="utf-8", errors="replace") if path.endswith(".gz") else open(path, "r", encoding="utf-8", errors="replace")

def parse_line(line):
    m = LOG_RE.match(line)
    if not m: return None
    gd = m.groupdict()
    try: method, uri, proto = gd["req"].split(" ", 2)
    except Exception: method, uri, proto = ("", "", "")
    try: ts = datetime.strptime(gd["date"], DATE_FMT)
    except Exception: ts = None
    return {"ip": gd["ip"], "method": method, "uri": uri, "proto": proto,
            "status": int(gd["status"]), "size": None if gd["size"]=="-" else int(gd["size"]),
            "referer": gd.get("referer") or "-", "agent": gd.get("agent") or "-",
            "ts": ts, "raw_req": gd["req"], "raw_line": line.rstrip("\n")}

def ts_to_str(ts): return "-" if not ts else ts.astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
def flag(ip, reason, detail=None):
    suspicious_events.append((ip, reason, detail, ts_to_str(datetime.now().astimezone())))
    print(f"[FLAG] {ip} => {reason} | {detail}")

# ---------- whitelist ----------
def _normalize_ipv4(ip_str):
    try: return str(ipaddress.ip_address(ip_str))
    except Exception: return None

def _expand_to_network(s):
    s = s.strip()
    # range A.B.C.D-E.F.G.H
    if "-" in s:
        try:
            lhs, rhs = s.split("-",1)
            start = ipaddress.ip_address(lhs.strip())
            end   = ipaddress.ip_address(rhs.strip())
            if int(end) >= int(start):
                nets = list(ipaddress.summarize_address_range(start, end))
                if len(nets)==1: return nets[0]
        except Exception: pass
    # cidr shorthand "172.16/12" or "10/8"
    m = re.match(r"^(\d{1,3})(?:\.(\d{1,3}))?\/(\d{1,2})$", s)
    if m:
        a=m.group(1); b=m.group(2); mask=int(m.group(3))
        base = f"{a}.0.0.0/{mask}" if b is None else f"{a}.{b}.0.0/{mask}"
        try:
            net=ipaddress.ip_network(base, strict=False)
            if isinstance(net, ipaddress.IPv4Network): return net
        except Exception: pass
    # wildcards
    if "*" in s:
        parts=s.split("."); fixed=[p for p in parts if p!="*"]
        if len(fixed)==3: return ipaddress.ip_network(".".join(fixed)+".0/24", strict=False)
        if len(fixed)==2:
            ab=".".join(fixed)
            return ipaddress.ip_network("172.16.0.0/12" if ab=="172.16" else ab+".0.0/16", strict=False)
        if len(fixed)==1: return ipaddress.ip_network(fixed[0]+".0.0.0/8", strict=False)
    # CIDR or IP
    try:
        net=ipaddress.ip_network(s, strict=False)
        if isinstance(net, ipaddress.IPv4Network): return net
    except Exception: pass
    try:
        ip=ipaddress.ip_address(s)
        if isinstance(ip, ipaddress.IPv4Address): return ipaddress.ip_network(str(ip)+"/32", strict=False)
    except Exception: pass
    # shorthands
    parts=s.split(".")
    if len(parts)==3 and all(p.isdigit() for p in parts): return ipaddress.ip_network(s+".0/24", strict=False)
    if len(parts)==2 and all(p.isdigit() for p in parts):
        return ipaddress.ip_network("172.16.0.0/12" if s=="172.16" else s+".0.0/16", strict=False)
    if len(parts)==1 and parts[0].isdigit(): return ipaddress.ip_network(parts[0]+".0.0.0/8", strict=False)
    return None

def load_whitelist(path):
    global _whitelist_networks,_whitelist_ips
    nets, ips = [], set()
    if path and os.path.isfile(path):
        with open(path,"r",encoding="utf-8",errors="replace") as f:
            for raw in f:
                s=raw.strip()
                if not s or s.startswith("#"): continue
                net=_expand_to_network(s)
                if net:
                    if net.prefixlen==32: ips.add(str(net.network_address))
                    else: nets.append(net)
                    continue
                norm=_normalize_ipv4(s)
                if norm: ips.add(norm)
    _whitelist_networks, _whitelist_ips = nets, ips

def ip_is_whitelisted(ip):
    try: ip_obj = ipaddress.ip_address(ip)
    except Exception: return False
    if str(ip_obj) in _whitelist_ips: return True
    for net in _whitelist_networks:
        try:
            if ip_obj in net: return True
        except Exception: continue
    return False

# ---------- SQLi normalization ----------
def url_unquote_safe(s: str) -> str:
    try: return urllib.parse.unquote_plus(urllib.parse.unquote_plus(s))
    except Exception: return s

def strip_sql_comments(s: str) -> str:
    s1 = re.sub(r"/\*.*?\*/", " ", s, flags=re.S)
    s1 = re.sub(r"%2f\*.*?\*%2f", " ", s1, flags=re.I|re.S)
    s1 = re.sub(r"--[^\n\r]*", " ", s1)
    return s1

def normalize_for_sqli(s: str) -> str:
    s0=url_unquote_safe(s); s1=strip_sql_comments(s0); s2=s1.lower()
    return re.sub(r"[\W_]+"," ",s2).strip()

# ---------- scoring ----------
def compute_ip_score(ip):
    st=ip_stats.get(ip)
    if not st: return 0,[]
    score,reasons=0,[]
    cnt=st["count"]

    if len(st["unique_php"])>=THRESH_UNIQUE_PHP_PER_IP:
        score+=5; reasons.append(f"many_php_files={len(st['unique_php'])}")

    total_4xx=sum(v for k,v in st["statuses"].items() if 400<=k<500)
    if cnt>=MIN_REQUESTS_FOR_404_RATIO:
        r=total_4xx/cnt
        if r>=THRESH_404_RATIO: score+=4; reasons.append(f"4xx_ratio={r:.2f}")

    if st["first_ts"] and st["last_ts"] and (st["last_ts"]>st["first_ts"]):
        span=(st["last_ts"]-st["first_ts"]).total_seconds()
        rps=cnt/span if span>0 else cnt
        if cnt>=MIN_REQUESTS_FOR_RATE and rps>=THRESH_REQS_PER_SEC:
            score+=4; reasons.append(f"rps={rps:.2f}")

    if len(st["uas"])>=THRESH_UNIQUE_UA_PER_IP:
        score+=2; reasons.append(f"unique_uas={len(st['uas'])}")
    if len(st["paths"])>200:
        score+=2; reasons.append(f"unique_paths={len(st['paths'])}")
    if st["statuses"].get(401,0)+st["statuses"].get(403,0)>50:
        score+=1; reasons.append(f"auth_failures={st['statuses'].get(401,0)+st['statuses'].get(403,0)}")

    f=st["flags"]
    if f.get("sqli",0)>=1:        score+=WEIGHTS["sqli"];        reasons.append(f"sqli={f['sqli']}")
    if f.get("lfi_rfi",0)>=1:     score+=WEIGHTS["lfi_rfi"];     reasons.append(f"lfi_rfi={f['lfi_rfi']}")
    if f.get("sensitive",0)>=3:   score+=WEIGHTS["sensitive_3"]; reasons.append(f"sensitive={f['sensitive']}")
    elif f.get("sensitive",0)>=1: score+=WEIGHTS["sensitive_1"]; reasons.append(f"sensitive={f['sensitive']}")
    if f.get("binary",0)>=1:      score+=WEIGHTS["binary"];      reasons.append(f"binary={f['binary']}")
    if f.get("long_uri",0)>=1:    score+=WEIGHTS["long_uri"];    reasons.append(f"long_uri={f['long_uri']}")
    if f.get("non_print",0)>=1:   score+=WEIGHTS["non_print"];   reasons.append(f"non_print={f['non_print']}")

    return score,reasons

def compute_scores():
    out={}
    for ip in ip_stats:
        if WHITELIST_MODE=="ignore" and ip_is_whitelisted(ip): continue
        s,rs=compute_ip_score(ip)
        if s>0: out[ip]=(s,ip_stats[ip]["count"],rs)
    return out

# ---------- firewall infra & counters ----------
def ensure_nft_infra():
    rc,_,_ = run(["nft","list","table",NFT_FAMILY,NFT_TABLE])
    if rc!=0:
        rc,_,err = run(["nft","add","table",NFT_FAMILY,NFT_TABLE])
        if rc!=0: raise RuntimeError(f"nft add table failed: {err}")

    # per-IP regular chain (kein hook, kein 'type filter')
    rc,_,_ = run(["nft","list","chain",NFT_FAMILY,NFT_TABLE,PERIP_CHAIN])
    if rc!=0:
        rc,_,err = run(["nft","add","chain",NFT_FAMILY,NFT_TABLE,PERIP_CHAIN,"{","}"])
        if rc!=0: raise RuntimeError(f"nft add chain {PERIP_CHAIN} failed: {err}")

    # set für Sammel-Drops
    rc,_,_ = run(["nft","list","set",NFT_FAMILY,NFT_TABLE,NFT_SET])
    if rc!=0:
        rc,_,err = run(["nft","add","set",NFT_FAMILY,NFT_TABLE,NFT_SET,"{","type","ipv4_addr",";","flags","interval",";","}"])
        if rc!=0: raise RuntimeError(f"nft add set failed: {err}")

    # base chain mit hook neu aufbauen und Reihenfolge sicherstellen
    rc,_,_ = run(["nft","list","chain",NFT_FAMILY,NFT_TABLE,NFT_CHAIN])
    if rc!=0:
        rc,_,err = run(["nft","add","chain",NFT_FAMILY,NFT_TABLE,NFT_CHAIN,
                        "{","type","filter","hook","input","priority","-5",";","policy","accept",";","}"])
        if rc!=0: raise RuntimeError(f"nft add chain {NFT_CHAIN} failed: {err}")

    run(["nft","flush","chain",NFT_FAMILY,NFT_TABLE,NFT_CHAIN])
    rc,_,err = run(["nft","add","rule",NFT_FAMILY,NFT_TABLE,NFT_CHAIN,"jump",PERIP_CHAIN])
    if rc!=0: raise RuntimeError(f"nft add jump rule failed: {err}")
    rc,_,err = run(["nft","add","rule",NFT_FAMILY,NFT_TABLE,NFT_CHAIN,"ip","saddr","@"+NFT_SET,"drop"])
    if rc!=0: raise RuntimeError(f"nft add set-drop rule failed: {err}")

def nft_list_set_ips():
    rc,out,_=run(["nft","list","set",NFT_FAMILY,NFT_TABLE,NFT_SET])
    if rc!=0: return set()
    return set(re.findall(r'(\d+\.\d+\.\d+\.\d+)', out))

def nft_add_ip(ip):
    ensure_nft_infra()
    rc,_,err=run(["nft","add","element",NFT_FAMILY,NFT_TABLE,NFT_SET,"{",ip,"}"])
    if rc!=0 and "exists" not in err.lower(): raise RuntimeError(err)
    blocked_ips.add(ip); blocked_backend[ip]="nft"


def nft_add_perip_rule(ip):
    """
    Add a per-IP counter rule into PERIP_CHAIN.
    Try the simplest working form first (no comment): 'ip saddr <ip> counter drop'.
    Fall back to other permutations only if needed.
    """
    ensure_nft_infra()

    candidates = [
        # 1) simplest & proven on your box: counter drop (no comment)
        ["ip", "saddr", ip, "counter", "drop"],
        # 2) comment before/after counter (try, but many nft builds choke here)
        ["ip", "saddr", ip, "comment", IPT_COMMENT, "counter", "drop"],
        ["ip", "saddr", ip, "counter", "comment", IPT_COMMENT, "drop"],
        # 3) last resort: try without counter (less ideal but sometimes accepted)
        ["ip", "saddr", ip, "drop"],
    ]

    last_err = None
    for tail in candidates:
        cmd = ["nft", "add", "rule", NFT_FAMILY, NFT_TABLE, PERIP_CHAIN] + tail
        rc, out, err = run(cmd)
        if rc == 0:
            # success
            return
        last_err = err or out or f"rc={rc}"
        if last_err and "exists" in last_err.lower():
            # rule already exists (treat as success)
            return

    # all attempts failed -> warn (we still allow iptables fallback later)
    print(f"[WARN] per-ip rule add failed for {ip}: {last_err}")


def poll_firewall_counters():
    hits={}
    rc,out,_=run(["nft","list","chain",NFT_FAMILY,NFT_TABLE,PERIP_CHAIN])
    if rc==0 and out:
        for ip,pkts in re.findall(r'ip saddr (\d+\.\d+\.\d+\.\d+).*?counter packets (\d+)', out):
            try: hits[ip]=int(pkts)
            except: pass
    rc,out,_=run(["iptables","-L","INPUT","-n","-v","--line-numbers"])
    if rc==0 and out:
        for line in out.splitlines():
            if IPT_COMMENT in line and "DROP" in line:
                parts=line.split()
                try:
                    pkts=int(parts[1]); src=parts[-2]
                    if re.match(r'^\d+\.\d+\.\d+\.\d+$', src): hits[src]=pkts
                except: pass
    for ip,v in hits.items(): fw_hits[ip]=v

def ipt_add_ip(ip):
    rc,_,_=run(["iptables","-C","INPUT","-s",ip,"-m","comment","--comment",IPT_COMMENT,"-j","DROP"])
    if rc==0: blocked_ips.add(ip); blocked_backend[ip]="iptables"; return
    rc,_,err=run(["iptables","-I","INPUT","1","-s",ip,"-m","comment","--comment",IPT_COMMENT,"-j","DROP"])
    if rc!=0: raise RuntimeError(err)
    blocked_ips.add(ip); blocked_backend[ip]="iptables"

def block_ip_immediate(ip, do_block):
    if ip in blocked_ips: return False
    if not do_block:
        print(f"[DRY-RUN] Would block {ip}"); return True
    if have_cmd("nft"):
        try:
            nft_add_ip(ip); nft_add_perip_rule(ip)
            print(f"[BLOCKED nft] {ip}"); return True
        except Exception as e:
            print(f"[nft add failed] {e}; trying iptables...")
    if have_cmd("iptables"):
        try:
            ipt_add_ip(ip); print(f"[BLOCKED iptables] {ip}"); return True
        except Exception as e:
            print(f"[iptables add failed] {e}")
    print(f"[BLOCK FAILED] {ip}"); return False

# ---------- processing ----------
def should_include_entry(parsed, cutoff_dt):
    if cutoff_dt is None: return True
    if parsed.get("ts") is None: return False
    return parsed["ts"]>=cutoff_dt

def process_parsed(parsed):
    global processed_entries
    ip=parsed["ip"]
    if WHITELIST_MODE=="ignore" and ip_is_whitelisted(ip): return
    st=ip_stats[ip]
    st["count"]+=1
    if parsed["ts"]:
        if st["first_ts"] is None or parsed["ts"]<st["first_ts"]: st["first_ts"]=parsed["ts"]
        if st["last_ts"]  is None or parsed["ts"]>st["last_ts"] : st["last_ts"]=parsed["ts"]
    path=parsed["uri"].split("?",1)[0]
    st["paths"][path]+=1
    if path.lower().endswith(".php"): st["unique_php"].add(path.lower())
    st["statuses"][parsed["status"]]+=1
    if parsed["agent"]: st["uas"][parsed["agent"]]+=1
    st["req_lines"].append(parsed)
    processed_entries+=1

    # SQLi robust
    uri_dec_norm=normalize_for_sqli(parsed["uri"])
    sqli=False
    if re.search(r"\bselect\b.+\bfrom\b", uri_dec_norm): sqli=True
    if re.search(r"\bor\s+1\s*=\s*1\b", uri_dec_norm):  sqli=True
    if "sleep(" in uri_dec_norm or "benchmark(" in uri_dec_norm: sqli=True
    if "chr(" in uri_dec_norm or "chr " in uri_dec_norm: sqli=True
    if "sysibm sysdummy1" in uri_dec_norm: sqli=True
    if "raise error" in uri_dec_norm or "raise_error" in uri_dec_norm: sqli=True
    if sqli: st["flags"]["sqli"]+=1; flag(ip,"Possible SQLi attempt",parsed["uri"])

    if LFI_RFI_RE.search(parsed["uri"]): st["flags"]["lfi_rfi"]+=1; flag(ip,"Possible LFI/RFI attempt",parsed["uri"])
    if SENSITIVE_FILES_RE.search(parsed["uri"]): st["flags"]["sensitive"]+=1; flag(ip,"Access to sensitive filename",parsed["uri"])
    if NON_PRINTABLE_RE.search(parsed["raw_req"]): st["flags"]["non_print"]+=1; flag(ip,"Non-printable bytes in request",parsed["raw_req"])
    if len(parsed["uri"])>LONG_URL_LENGTH: st["flags"]["long_uri"]+=1; flag(ip,"Very long URI",parsed["uri"][:200]+"...")
    if "\\x" in parsed["raw_line"] or "\x16" in parsed["raw_line"]: st["flags"]["binary"]+=1; flag(ip,"Binary/TLS handshake on HTTP port",parsed["raw_line"])

# ---------- status page ----------
def _reason_tag(lbl, title, cls="tag-blue"):
    return f"<span class='tag {cls}' title='{title}'><b>{lbl}</b></span>"

def render_reason_tags(reasons):
    tags=[]
    for r in reasons:
        key,val=(r.split("=",1)+[""])[:2]; key=key.strip(); val=val.strip()
        if key=="many_php_files": tags.append(_reason_tag(f"PHP{val}",f"Many distinct PHP files: {val}","tag-red"))
        elif key=="4xx_ratio":
            try: pct=f"{float(val)*100:.0f}%"
            except: pct=val
            tags.append(_reason_tag(f"4xx {pct}",f"High 4xx error ratio: {val}","tag-amber"))
        elif key=="rps": tags.append(_reason_tag(f"RPS {val}",f"High request rate (req/s): {val}","tag-purple"))
        elif key=="unique_uas": tags.append(_reason_tag(f"UA{val}",f"Many unique User-Agents: {val}","tag-cyan"))
        elif key=="unique_paths": tags.append(_reason_tag(f"PATHS {val}",f"Many unique paths requested: {val}","tag-blue"))
        elif key=="auth_failures": tags.append(_reason_tag(f"AUTH {val}",f"Auth failures (401/403): {val}","tag-green"))
        elif key=="sqli": tags.append(_reason_tag(f"SQLi {val}",f"SQL injection indicators: {val}","tag-red"))
        elif key=="lfi_rfi": tags.append(_reason_tag(f"LFI/RFI {val}",f"Traversal/File inclusion: {val}","tag-amber"))
        elif key=="sensitive": tags.append(_reason_tag(f"SENS {val}",f"Sensitive filenames: {val}","tag-blue"))
        elif key=="binary": tags.append(_reason_tag(f"BIN {val}",f"Binary/TLS on HTTP: {val}","tag-purple"))
        elif key=="long_uri": tags.append(_reason_tag(f"LONG {val}",f"Very long URIs: {val}","tag-cyan"))
        elif key=="non_print": tags.append(_reason_tag(f"CTRL {val}",f"Non-printable chars: {val}","tag-gray"))
        else:
            label=(key[:6]+"…") if len(key)>7 else key
            tags.append(_reason_tag(f"{label}={val}" if val else label, r, "tag-gray"))
    return "".join(tags) if tags else "<span class='tag tag-gray'>-</span>"

def _flag_tag(reason_text):
    rt=(reason_text or "").lower()
    if "sqli" in rt:             return _reason_tag("SQLi","Possible SQL injection","tag-red")
    if "lfi" in rt or "rfi" in rt: return _reason_tag("LFI/RFI","File inclusion / traversal","tag-amber")
    if "sensitive" in rt:        return _reason_tag("SENS","Sensitive filename","tag-blue")
    if "binary" in rt or "tls" in rt: return _reason_tag("BIN","Binary/TLS on HTTP","tag-purple")
    if "long uri" in rt:         return _reason_tag("LONG","Very long URI","tag-cyan")
    if "non-printable" in rt:    return _reason_tag("CTRL","Non-printable bytes","tag-gray")
    return _reason_tag("FLAG", reason_text or "Flagged","tag-gray")

def render_status_html(now_ts, threshold, wl_desc):
    scores=compute_scores()
    blocked=[]; yellow=[]; observed=[]
    for ip,(score,cnt,reasons) in scores.items():
        st=ip_stats[ip]
        row={"ip":ip,"score":score,"reqs":cnt,"reasons":reasons,
             "first":ts_to_str(st["first_ts"]),"last":ts_to_str(st["last_ts"]),
             "backend":blocked_backend.get(ip,"n/a") if ip in blocked_ips else "n/a"}
        if ip in blocked_ips: blocked.append(row)
        elif score>=max(0,threshold-1) and score<=threshold: yellow.append(row)
        else: observed.append(row)
    keyf=lambda r:(-r["score"],-r["reqs"],r["ip"])
    blocked.sort(key=keyf); yellow.sort(key=keyf); observed.sort(key=keyf)
    recent=suspicious_events[-50:]; refresh=STATUS_INTERVAL

    blocked_rows = (
        "".join(
            f"<tr><td><code>{r['ip']}</code></td>"
            f"<td class='red'><b>{r['score']}</b></td>"
            f"<td>{r['reqs']}</td>"
            f"<td>{fw_hits.get(r['ip'],0)}</td>"
            f"<td>{render_reason_tags(r['reasons'])}</td>"
            f"<td>{r['backend']}</td>"
            f"<td>{r['first']}</td>"
            f"<td>{r['last']}</td></tr>"
            for r in blocked
        ) or "<tr><td colspan='8' class='small'>None</td></tr>"
    )

    yellow_rows = (
        "".join(
            f"<tr><td><code>{r['ip']}</code></td>"
            f"<td class='amber'><b>{r['score']}</b></td>"
            f"<td>{r['reqs']}</td>"
            f"<td>{render_reason_tags(r['reasons'])}</td>"
            f"<td>{r['first']}</td>"
            f"<td>{r['last']}</td></tr>"
            for r in yellow
        ) or "<tr><td colspan='6' class='small'>None</td></tr>"
    )

    observed_rows = (
        "".join(
            f"<tr><td><code>{r['ip']}</code></td>"
            f"<td>{r['score']}</td>"
            f"<td>{r['reqs']}</td>"
            f"<td>{render_reason_tags(r['reasons'])}</td>"
            f"<td>{r['first']}</td>"
            f"<td>{r['last']}</td></tr>"
            for r in observed[:30]
        ) or "<tr><td colspan='6' class='small'>None</td></tr>"
    )

    recent_rows = (
        "".join(
            f"<tr><td>{i}</td><td><code>{ip}</code></td><td>{_flag_tag(reason)}</td><td><code>{(detail or '')[:160]}</code></td><td>{ts}</td></tr>"
            for i,(ip,reason,detail,ts) in enumerate(recent, start=max(1,len(suspicious_events)-len(recent)+1))
        ) or "<tr><td colspan='5' class='small'>No recent flags.</td></tr>"
    )

    html=f"""<!doctype html><html lang="en"><head>
<meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1">
<title>Security Scanner — Live Status</title>
<meta http-equiv="refresh" content="{refresh}">
<style>
body{{margin:0;background:#0b0f14;color:#e6edf3;font:15px/1.6 system-ui,-apple-system,Segoe UI,Roboto}}
.container{{max-width:1200px;margin:28px auto;padding:16px}}
.header{{display:flex;justify-content:space-between;align-items:center;gap:12px;flex-wrap:wrap}}
h1{{margin:0;font-size:22px}}.muted{{color:#9aa7b6;font-size:13px}}
.panel{{background:#0f1720;border:1px solid #223243;border-radius:12px;padding:14px;margin-top:14px}}
.kpis{{display:flex;gap:12px;flex-wrap:wrap}}.kpi{{flex:1;min-width:160px;padding:12px;background:rgba(255,255,255,.03);border-radius:10px}}
table{{width:100%;border-collapse:collapse}}th,td{{padding:8px 10px;border-bottom:1px solid #223243;text-align:left}}
th{{background:rgba(255,255,255,.05)}}
.tag{{display:inline-block;padding:3px 8px;border-radius:999px;font-size:12px;margin:2px 6px 2px 0}}
.tag-red{{background:rgba(255,107,107,.08);color:#ff9b9b}}
.tag-amber{{background:rgba(245,185,66,.10);color:#ffd27f}}
.tag-blue{{background:rgba(138,180,255,.10);color:#8ab4ff}}
.tag-cyan{{background:rgba(158,240,240,.10);color:#9ef0f0}}
.tag-purple{{background:rgba(180,138,255,.10);color:#d6b3ff}}
.tag-gray{{background:rgba(255,255,255,.05);color:#c8d1db}}
.small{{color:#9aa7b6;font-size:13px}}
.red{{color:#ff6b6b}}
</style></head><body><div class="container">
<div class="header"><h1>Security Scanner — Live Status</h1><div class="muted">Updated: {now_ts} • Refresh: {refresh}s • Whitelist: {wl_desc}</div></div>

<div class="kpis">
  <div class="kpi"><div style="font-size:20px">{len(blocked)}</div><div class="small">Blocked IPs</div></div>
  <div class="kpi"><div style="font-size:20px">{len(yellow)}</div><div class="small">Yellow (≤ threshold)</div></div>
  <div class="kpi"><div style="font-size:20px">{len(observed)}</div><div class="small">Observed</div></div>
  <div class="kpi"><div style="font-size:20px">{processed_entries}</div><div class="small">Processed entries</div></div>
</div>

<div class="panel">
  <h3 class="small">Blocked</h3>
  <table><thead><tr><th>IP</th><th>Score</th><th>Reqs</th><th>Hits</th><th>Reasons</th><th>Backend</th><th>First</th><th>Last</th></tr></thead>
  <tbody>{blocked_rows}</tbody></table>
</div>

<div class="panel">
  <h3 class="small">Yellow</h3>
  <table><thead><tr><th>IP</th><th>Score</th><th>Reqs</th><th>Reasons</th><th>First</th><th>Last</th></tr></thead>
  <tbody>{yellow_rows}</tbody></table>
</div>

<div class="panel">
  <h3 class="small">Top observed</h3>
  <table><thead><tr><th>IP</th><th>Score</th><th>Reqs</th><th>Reasons</th><th>First</th><th>Last</th></tr></thead>
  <tbody>{observed_rows}</tbody></table>
</div>

<div class="panel">
  <h3 class="small">Recent flags</h3>
  <table><thead><tr><th>#</th><th>IP</th><th>Reason</th><th>Detail</th><th>When</th></tr></thead>
  <tbody>{recent_rows}</tbody></table>
</div>

<div class="small muted">Threshold: block if <b>score &gt; {threshold}</b> • Status interval: {STATUS_INTERVAL}s</div>
</div></body></html>"""
    return html

def write_status_page(threshold, wl_desc):
    global _last_status_write
    now=datetime.now().astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
    html=render_status_html(now, threshold, wl_desc)
    os.makedirs(os.path.dirname(STATUS_PAGE_PATH), exist_ok=True)
    with open(STATUS_PAGE_PATH,"w",encoding="utf-8") as f: f.write(html)
    _last_status_write=time.time()
    print(f"[STATUS] wrote {STATUS_PAGE_PATH}")

def maybe_update_status(threshold, wl_desc, force=False):
    t=time.time()
    if force or (t-_last_status_write>=max(5,STATUS_INTERVAL)): write_status_page(threshold, wl_desc)

# ---------- scan ----------
def scan_files(paths, cutoff_dt=None):
    global total_lines_attempted
    expanded=[]
    for p in paths: expanded.extend(sorted(glob.glob(p)))
    expanded=[p for p in expanded if os.path.isfile(p)]
    if not expanded:
        print("No files found for:", paths, file=sys.stderr); return
    print(f"[SCAN] {len(expanded)} file(s)"); [print(" -",p) for p in expanded]
    if cutoff_dt: print("[SCAN] only entries newer than:", cutoff_dt.isoformat()); print()
    for p in expanded:
        try:
            with open_log(p) as fh:
                for ln in fh:
                    total_lines_attempted+=1
                    parsed=parse_line(ln)
                    if not parsed:
                        if NON_PRINTABLE_RE.search(ln): flag("UNKNOWN","Non-printable bytes in line",ln.strip())
                        continue
                    if not should_include_entry(parsed, cutoff_dt): continue
                    process_parsed(parsed)
        except Exception as e:
            print(f"[ERROR] reading {p}: {e}", file=sys.stderr)

# ---------- live ----------
def follow_and_live(tail_file, cutoff_dt, do_block, score_threshold,
                    rate_limit, rate_window, block_on_rate,
                    block_on_first_sqli, block_on_sensitive):
    global _last_fw_poll  # <-- declare once, at the start

    # init blocked from nft
    if have_cmd("nft"):
        try:
            existing = nft_list_set_ips()
            blocked_ips.update(existing)
            for ip in existing:
                blocked_backend[ip] = "nft"
        except Exception:
            pass

    wl_desc = f"{WHITELIST_MODE} ({len(_whitelist_ips)} IPs, {len(_whitelist_networks)} nets)"
    print(f"[LIVE] following {tail_file} ... | whitelist: {wl_desc}")
    maybe_update_status(score_threshold, wl_desc, force=True)

    try:
        with open(tail_file, "r", encoding="utf-8", errors="replace") as fh:
            fh.seek(0, os.SEEK_END)
            while True:
                line = fh.readline()
                if not line:
                    # periodic FW poll & status
                    nowt = time.time()
                    if nowt - _last_fw_poll >= FW_POLL_INTERVAL:
                        try:
                            poll_firewall_counters()
                        except Exception as e:
                            print(f"[FW POLL WARN] {e}")
                        _last_fw_poll = nowt
                    time.sleep(0.25)
                    maybe_update_status(score_threshold, wl_desc, force=False)
                    continue

                parsed = parse_line(line)
                if not parsed:
                    if NON_PRINTABLE_RE.search(line):
                        flag("UNKNOWN", "Non-printable bytes in line", line.strip())
                        maybe_update_status(score_threshold, wl_desc, force=False)
                    continue
                if not should_include_entry(parsed, cutoff_dt):
                    continue

                ip = parsed["ip"]
                if WHITELIST_MODE == "ignore" and ip_is_whitelisted(ip):
                    continue

                process_parsed(parsed)

                now = time.time()
                dq = ip_recent[ip]
                dq.append(now)
                cutoff = now - rate_window
                while dq and dq[0] < cutoff:
                    dq.popleft()
                current_rps = len(dq) / rate_window if rate_window > 0 else 0.0

                score, reasons = compute_ip_score(ip)
                print(f"[LIVE] {ip}  reqs={ip_stats[ip]['count']}  score={score}  rps_win={current_rps:.2f}  reasons={'|'.join(reasons) if reasons else '-'}")

                # immediate blocks
                if block_on_first_sqli and ip_stats[ip]["flags"].get("sqli", 0) >= 1:
                    if (not ip_is_whitelisted(ip)) and ip not in blocked_ips:
                        if block_ip_immediate(ip, do_block):
                            maybe_update_status(score_threshold, wl_desc, force=True)
                            continue
                if block_on_sensitive and ip_stats[ip]["flags"].get("sensitive", 0) >= 1:
                    if (not ip_is_whitelisted(ip)) and ip not in blocked_ips:
                        if block_ip_immediate(ip, do_block):
                            maybe_update_status(score_threshold, wl_desc, force=True)
                            continue

                # rate-limit
                if block_on_rate and (len(dq) > rate_limit * rate_window) and ip not in blocked_ips and not ip_is_whitelisted(ip):
                    flag(ip, "Rate limit exceeded", f"{len(dq)} req in {rate_window}s (~{current_rps:.2f} rps)")
                    if block_ip_immediate(ip, do_block):
                        maybe_update_status(score_threshold, wl_desc, force=True)
                        continue

                # score fallback
                if score > score_threshold and ip not in blocked_ips and not ip_is_whitelisted(ip):
                    if block_ip_immediate(ip, do_block):
                        maybe_update_status(score_threshold, wl_desc, force=True)
                        continue

                # periodic FW poll while busy
                nowt = time.time()
                if nowt - _last_fw_poll >= FW_POLL_INTERVAL:
                    try:
                        poll_firewall_counters()
                    except Exception as e:
                        print(f"[FW POLL WARN] {e}")
                    _last_fw_poll = nowt

                maybe_update_status(score_threshold, wl_desc, force=False)

    except KeyboardInterrupt:
        print("\n[LIVE] stopped by user.")
    except Exception as e:
        print(f"[LIVE ERROR] {e}", file=sys.stderr)

# ---------- report ----------
def analyze_and_report(out_json, score_threshold, do_block=False, unblock=False):
    wl_desc=f"{WHITELIST_MODE} ({len(_whitelist_ips)} IPs, {len(_whitelist_networks)} nets)"
    ip_scores=compute_scores()

    print("\n==== Summary ====")
    print(f"Total log lines parsed: {total_lines_attempted}")
    print(f"Processed entries:      {processed_entries}")
    print(f"Scored IPs:             {len(ip_scores)}")
    for ip,(score,cnt,rs) in sorted(ip_scores.items(), key=lambda kv:(-kv[1][0], -kv[1][1]))[:50]:
        print(f" - {ip}  score={score}  reqs={cnt}  reasons={', '.join(rs)}")

    report={"total_lines_attempted": total_lines_attempted,
            "processed_entries": processed_entries,
            "scored_ips": len(ip_scores),
            "whitelist":{"mode":WHITELIST_MODE,"ips":len(_whitelist_ips),"nets":len(_whitelist_networks)},
            "top_suspicious":[{"ip":ip,"score":v[0],"reqs":v[1],"reasons":v[2]} for ip,v in sorted(ip_scores.items(), key=lambda kv:(-kv[1][0], -kv[1][1]))][:200],
            "flagged_events":[{"ip":ip,"reason":r,"detail":d,"ts":ts} for (ip,r,d,ts) in suspicious_events[-500:]]}
    with open(out_json,"w",encoding="utf-8") as rf: json.dump(report, rf, ensure_ascii=False, indent=2)
    print(f"[REPORT] wrote {out_json}")
    maybe_update_status(score_threshold, wl_desc, force=True)

    if unblock:
        if have_cmd("nft"):
            try: run(["nft","flush","set",NFT_FAMILY,NFT_TABLE,NFT_SET]); blocked_ips.clear(); blocked_backend.clear(); print("[UNBLOCK] nft set flushed.")
            except Exception as e: print(f"[UNBLOCK ERROR nft] {e}")
        if have_cmd("iptables"):
            rc,out,_=run(["iptables-save"])
            if rc==0:
                for line in out.splitlines():
                    if IPT_COMMENT in line and "-A INPUT" in line:
                        parts=line.split(); parts[0]="-D"; run(["iptables"]+parts)
                print("[UNBLOCK] iptables rules removed.")
    elif do_block:
        candidates=[ip for ip,(score,_,_) in ip_scores.items() if score>score_threshold and not ip_is_whitelisted(ip)]
        to_block=[ip for ip in candidates if ip not in blocked_ips]
        if to_block:
            print(f"[BATCH BLOCK] {len(to_block)} IPs")
            for ip in to_block: block_ip_immediate(ip, True)
            maybe_update_status(score_threshold, wl_desc, force=True)

# ---------- CLI ----------
def main():
    global STATUS_PAGE_PATH, STATUS_INTERVAL, WHITELIST_MODE, FW_POLL_INTERVAL
    ap=argparse.ArgumentParser(description="Apache suspicious scanner (live blocking, whitelist, status page, rate-limit, per-IP firewall counters).")
    ap.add_argument("paths", nargs="*", help="Files/globs (e.g. /var/log/apache2/access.log*)")
    ap.add_argument("--days", type=int, default=None, help="Only consider last N days")
    ap.add_argument("--live", action="store_true", help="Follow first non-gz file and react live")
    ap.add_argument("--out", default="apache_suspicious_report.json", help="Output JSON file")
    ap.add_argument("--block", action="store_true", help="Apply firewall blocks (sudo)")
    ap.add_argument("--unblock", action="store_true", help="Remove previously applied blocks")
    ap.add_argument("--score-threshold", type=int, default=5, help="Block if score > threshold")
    ap.add_argument("--whitelist", type=str, default=None, help="Whitelist file path")
    ap.add_argument("--whitelist-mode", choices=["block-only","ignore"], default="block-only",
                    help="block-only: never block WL IPs; ignore: also exclude from scoring/views")
    ap.add_argument("--status-page", type=str, default=DEFAULT_STATUS_PAGE, help="HTML status page path")
    ap.add_argument("--status-interval", type=int, default=DEFAULT_STATUS_INTERVAL, help="Seconds between status updates (min 5)")
    ap.add_argument("--block-on-first-sqli", action="store_true", help="Immediately block on first SQLi indicator")
    ap.add_argument("--block-on-sensitive", action="store_true", help="Immediately block on first sensitive filename access (e.g., .env)")
    ap.add_argument("--rate-limit", type=float, default=2.0, help="Req/s threshold for auto-block (windowed)")
    ap.add_argument("--rate-window", type=int, default=15, help="Window in seconds for rate calculation")
    ap.add_argument("--block-on-rate", dest="block_on_rate", action="store_true")
    ap.add_argument("--no-block-on-rate", dest="block_on_rate", action="store_false")
    ap.set_defaults(block_on_rate=True)
    ap.add_argument("--fw-poll-interval", type=int, default=60, help="Seconds between firewall counter polls")

    args=ap.parse_args()
    STATUS_PAGE_PATH = args.status_page
    STATUS_INTERVAL  = max(5, args.status_interval)
    WHITELIST_MODE   = args.whitelist_mode
    FW_POLL_INTERVAL = max(5, args.fw_poll_interval)

    paths=args.paths or ["/var/log/apache2/access.log"]
    cutoff_dt=None
    if args.days is not None:
        if args.days<0: print("--days must be >= 0", file=sys.stderr); sys.exit(2)
        cutoff_dt=datetime.now().astimezone()-timedelta(days=args.days)

    load_whitelist(args.whitelist)
    scan_files(paths, cutoff_dt=cutoff_dt)
    wl_desc=f"{WHITELIST_MODE} ({len(_whitelist_ips)} IPs, {len(_whitelist_networks)} nets)"
    maybe_update_status(args.score_threshold, wl_desc, force=True)

    if args.unblock:
        analyze_and_report(args.out, args.score_threshold, do_block=args.block, unblock=True); return

    if args.live:
        expanded=[]; [expanded.extend(sorted(glob.glob(p))) for p in paths]
        expanded=[p for p in expanded if os.path.isfile(p)]
        tail=[x for x in expanded if not x.endswith(".gz")]
        if not tail:
            print("No non-gz logfile to follow found.", file=sys.stderr)
            analyze_and_report(args.out, args.score_threshold, do_block=args.block, unblock=False); return
        tail_file=tail[0]
        if args.block: analyze_and_report(args.out, args.score_threshold, do_block=True, unblock=False)
        follow_and_live(tail_file, cutoff_dt, args.block, args.score_threshold,
                        args.rate_limit, args.rate_window, args.block_on_rate,
                        args.block_on_first_sqli, args.block_on_sensitive)
        analyze_and_report(args.out, args.score_threshold, do_block=False, unblock=False); return

    analyze_and_report(args.out, args.score_threshold, do_block=args.block, unblock=False)

if __name__=="__main__":
    main()
