Browse Category

Uncategorized

ccp-backups folder missing in NSX-T backup

If you like me tried to to cleanup the backups in NSX-T and ran into error Cleanup script works only in folders, that contains subfolders "cluster-node-backups", "ccp-backups" and "inventory-summary" this post is for you.

I was trying to cleanup the backups before going to the next major release of nsx and i kept getting an error running the nsx_backup_cleaner.py script.

It would seem that the ccp-backups folder has been removed from the backup job so it simply doesn’t exist. VMware did fix the script with the 3.2 release.

If you have anything preventing you from getting to 3.2 here is the updated script

#!/usr/bin/env python
# ***************************************************************************
# Copyright 2020-2021 VMware, Inc.  All rights reserved. VMware Confidential.
# ***************************************************************************
# The purpose of this script is to remove old NSX backup files. Typically, this script
# will be placed on the SFTP server where the NSX Manager is uploading backup files,
# and included into a scheduler, for example cron.  Before running this script, you
# should update the BACKUP_ROOT variable.  This script works on Linux and Windows with
# both Python 2 and Python 3.
#
# On Linux SFTP server:
# You can add this script in the crontab to automatically run this script once daily
# Edit the anacron at /etc/cron.d or use crontab -e and add following line to execute the script at 10am everyday
# 00 10 * * * /sbin/nsx_backup_cleaner.py
#
# On Windows SFTP server:
# schtasks /Create /SC DAILY /TN PythonTask /TR "PATH_TO_PYTHON_EXE PATH_TO_PYTHON_SCRIPT"
# or you can add the same in TaskScheduler



from stat import S_ISREG, ST_ATIME, ST_CTIME, ST_MODE, S_ISDIR, S_IWUSR
import os, sys, time, datetime, shutil, getopt

def delete_files(delete_path_list, count):
    deleted_files = []

    for file in delete_path_list:
        for root, dirs, files in os.walk(file):
            for fname in files:
                full_path = os.path.join(root, fname)
                os.chmod(full_path, S_IWUSR)
        if count > 0:
            deleted_files.append(file)
            if os.path.isdir(file):
                shutil.rmtree(file)
            else:
                os.remove(file)
            count = count - 1
    return deleted_files


def delete_old_backup_enteries(folder, keep_days, min_count):
    keep_files = []
    for elem in os.listdir(folder):
        paths_sorted = []
        entries1 = (os.path.join(folder, elem, fn) for fn in os.listdir(os.path.join(folder, elem)))
        entries2 = ((os.stat(path), path) for path in entries1)
        entries3 = ((stat[ST_CTIME], path) for stat, path in entries2)
        for cdate, path in sorted(entries3):
            paths_sorted.append(path)

        if (len(paths_sorted) <= min_count):
            for file in paths_sorted:
                keep_files.append(file)
            continue

        delete_path_list = []
        for path in paths_sorted:
            file_create_time = os.path.getmtime(path)
            time_now = time.time()
            if ((time_now - file_create_time) > (keep_days * 24 * 60 * 60)):
                delete_path_list.append(path)

        deleted_files = delete_files(delete_path_list, min(len(delete_path_list), len(paths_sorted) - min_count))
        for file in deleted_files:
            paths_sorted.remove(file)

        for file in paths_sorted:
            keep_files.append(file)

    print(("Keeping the following backup files for folder %s" % folder))
    for file in keep_files:
        print(file)

def usage():
    print("""\
    Usage: nsx_backup_cleaner.py -d backup_dir [-k 1] [-l 5] [-h]
           Or
           nsx_backup_cleaner.py --dir backup_dir [--retention-period 1] [--min-count 5] [--help]

           Required
               -d/--dir: Backup root directory
               -k/--retention-period: Number of days need to retain a backup file
           Optional
               -l/--min-count: Minimum number of backup files to be kept, default value is 100
               -h/--help: Display help message
           """)
def main():
    BACKUP_ROOT = None

    BACKUPS_KEEP_DAYS = None
    # Minimum allowed: 100
    BACKUPS_MINCOUNT = 100

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:k:l:", ["dir=", "retention-period=", "min-count=", "help"])
    except getopt.GetoptError as err:
    # print help information and exit:
        print ((str(err))) # will print something like "option -a not recognized"
        usage()
        sys.exit()

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-d", "--dir"):
            BACKUP_ROOT = arg
        elif opt in("-k", "--retention-period"):
            BACKUPS_KEEP_DAYS = int(arg)
        elif opt in ("-l", "--min-count"):
            BACKUPS_MINCOUNT = int(arg)
        else:
            usage()
            sys.exit()

    if (BACKUP_ROOT == None):
        print("Missing Backup Root")
        usage()
        sys.exit()

    if (BACKUPS_KEEP_DAYS == None):
        print("Missing Backup Retention Period in number of days")
        usage()
        sys.exit()

    if (not os.path.isdir(BACKUP_ROOT)):
        print("Wrong backup root directory")
        usage()
        sys.exit()

    backup_dirs = os.listdir(BACKUP_ROOT)
    if (all(elem in ["cluster-node-backups", "inventory-summary"] for elem in backup_dirs)):
        for elem in backup_dirs:
            if (elem in ["cluster-node-backups"]):
                delete_old_backup_enteries(os.path.join(BACKUP_ROOT, 'cluster-node-backups'), BACKUPS_KEEP_DAYS, BACKUPS_MINCOUNT)
            if (elem in ["inventory-summary"]):
                delete_old_backup_enteries(os.path.join(BACKUP_ROOT, 'inventory-summary'), BACKUPS_KEEP_DAYS, BACKUPS_MINCOUNT)
    else:
        print ("Cleanup script works only in folders, that contains subfolders \"cluster-node-backups\" and \"inventory-summary\"")


if __name__ == "__main__":
    main()

Additionally for the script to work make sure there are no other folders within the backup folder. the backup folder should only have the cluster-node-backups and inventory-summary directories

SSC 8.8 urllib3 (1.25.11) or chardet (4.0.0) doesn’t match a supported version

I recently upgraded my LCM deployed SSC server to 8.8. If you need a guide to go through the upgrade you can find my other post here.

After the upgrade was completed i was noticing strange behavior in the SSC UI so i checked the status the of the services. Here are the errors i found and how i fixed them

The first step was to check the status of the service

systemctl status salt-master

The return was this

* salt-master.service - The Salt Master Server
   Loaded: loaded (/lib/systemd/system/salt-master.service; enabled; vendor preset: enabled)
   Active: active (running) since Mon 2022-05-16 03:32:35 UTC; 6s ago
     Docs: man:salt-master(1)
           file:///usr/share/doc/salt/html/contents.html
           https://docs.saltproject.io/en/latest/contents.html
 Main PID: 4577 (salt-master)
    Tasks: 39 (limit: 9830)
   Memory: 335.0M
   CGroup: /system.slice/salt-master.service
           |-4577 /bin/python3 /usr/bin/salt-master
           |-4581 /bin/python3 /usr/bin/salt-master
           |-4589 /bin/python3 /usr/bin/salt-master
           |-4593 /bin/python3 /usr/bin/salt-master
           |-4602 /bin/python3 /usr/bin/salt-master
           |-4606 /bin/python3 /usr/bin/salt-master
           |-4608 /bin/python3 /usr/bin/salt-master
           |-4609 /bin/python3 /usr/bin/salt-master
           |-4616 /bin/python3 /usr/bin/salt-master
           |-4697 /bin/python3 /usr/bin/salt-master
           |-4699 /bin/python3 /usr/bin/salt-master
           |-4703 /bin/python3 /usr/bin/salt-master
           |-4711 /bin/python3 /usr/bin/salt-master
           |-4712 /bin/python3 /usr/bin/salt-master
           |-4713 /bin/python3 /usr/bin/salt-master
           |-4714 /bin/python3 /usr/bin/salt-master
           |-4715 /bin/python3 /usr/bin/salt-master
           `-4717 /bin/python3 /usr/bin/salt-master

May 16 03:32:34 ssc-01a.corp.local systemd[1]: Starting The Salt Master Server...
May 16 03:32:35 ssc-01a.corp.local salt-master[4577]: [WARNING ] /usr/lib/python3.7/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.25.11) or chardet (4.0.0) doesn't match a supported version!
May 16 03:32:35 ssc-01a.corp.local salt-master[4577]:   RequestsDependencyWarning)
May 16 03:32:35 ssc-01a.corp.local systemd[1]: Started The Salt Master Server.

The way i got around the error was by running

pip3 install --upgrade requests

Alternatively the official documentation is here talks about extracting the .whl file from the my vmware customer connect portal here. The file we are looking for is vRA_SaltStack_Config-8.8.0.7-1_Installer.tar.gz

Once Downloaded we are looking for SSEAPE-8.8.0.7-py2.py3-none-any.whl found under sse-installer/salt/sse/eapi_plugin/files

The file needs to be uploaded on the node having the issue and we would run

sudo pip3 install SSEAPE-8.8.0.7-py2.py3-none-any.whl --prefix /usr 

Finally we can restart the saltstack service and verify that its running without errors:

* salt-master.service - The Salt Master Server
   Loaded: loaded (/lib/systemd/system/salt-master.service; enabled; vendor preset: enabled)
   Active: active (running) since Sun 2022-05-15 20:02:56 UTC; 51s ago
     Docs: man:salt-master(1)
           file:///usr/share/doc/salt/html/contents.html
           https://docs.saltproject.io/en/latest/contents.html
 Main PID: 31309 (salt-master)
    Tasks: 39 (limit: 9830)
   Memory: 330.0M
   CGroup: /system.slice/salt-master.service
           |-31309 /bin/python3 /usr/bin/salt-master
           |-31315 /bin/python3 /usr/bin/salt-master
           |-31320 /bin/python3 /usr/bin/salt-master
           |-31323 /bin/python3 /usr/bin/salt-master
           |-31325 /bin/python3 /usr/bin/salt-master
           |-31326 /bin/python3 /usr/bin/salt-master
           |-31327 /bin/python3 /usr/bin/salt-master
           |-31328 /bin/python3 /usr/bin/salt-master
           |-31330 /bin/python3 /usr/bin/salt-master
           |-31397 /bin/python3 /usr/bin/salt-master
           |-31398 /bin/python3 /usr/bin/salt-master
           |-31400 /bin/python3 /usr/bin/salt-master
           |-31411 /bin/python3 /usr/bin/salt-master
           |-31412 /bin/python3 /usr/bin/salt-master
           |-31413 /bin/python3 /usr/bin/salt-master
           |-31414 /bin/python3 /usr/bin/salt-master
           |-31415 /bin/python3 /usr/bin/salt-master
           `-31416 /bin/python3 /usr/bin/salt-master

May 15 20:02:54 ssc-01a.corp.local systemd[1]: Starting The Salt Master Server...
May 15 20:02:56 ssc-01a.corp.local systemd[1]: Started The Salt Master Server.

If the status page a return similar to this

sseapi_rpc_queue: could not connect to SSE server

Follow my other guide here

SSC 8.8 sseapi_rpc_queue: could not connect to SSE server

I recently upgraded my LCM deployed SSC server to 8.8. If you need a guide to go through the upgrade you can find my other post here.

After the upgrade was completed i was noticing strange behavior in the SSC UI so i checked the status the of the services. Here are the errors i found and how i fixed them

The first step was to check the status of the service

systemctl status salt-master

The return was this

root@ssc-01a [ ~ ]# systemctl status salt-master
* salt-master.service - The Salt Master Server
   Loaded: loaded (/lib/systemd/system/salt-master.service; enabled; vendor preset: enabled)
   Active: active (running) since Sun 2022-05-15 16:39:26 UTC; 8min ago
     Docs: man:salt-master(1)
           file:///usr/share/doc/salt/html/contents.html
           https://docs.saltproject.io/en/latest/contents.html
 Main PID: 3035 (salt-master)
    Tasks: 39 (limit: 9830)
   Memory: 357.0M
   CGroup: /system.slice/salt-master.service
           |-3035 /bin/python3 /usr/bin/salt-master
           |-3041 /bin/python3 /usr/bin/salt-master
           |-3110 /bin/python3 /usr/bin/salt-master
           |-3115 /bin/python3 /usr/bin/salt-master
           |-3119 /bin/python3 /usr/bin/salt-master
           |-3122 /bin/python3 /usr/bin/salt-master
           |-3123 /bin/python3 /usr/bin/salt-master
           |-3124 /bin/python3 /usr/bin/salt-master
           |-3125 /bin/python3 /usr/bin/salt-master
           |-3203 /bin/python3 /usr/bin/salt-master
           |-3204 /bin/python3 /usr/bin/salt-master
           |-3206 /bin/python3 /usr/bin/salt-master
           |-3214 /bin/python3 /usr/bin/salt-master
           |-3216 /bin/python3 /usr/bin/salt-master
           |-3219 /bin/python3 /usr/bin/salt-master
           |-3220 /bin/python3 /usr/bin/salt-master
           |-3221 /bin/python3 /usr/bin/salt-master
           `-4871 /bin/python3 /usr/bin/salt-master

May 15 16:39:26 ssc-01a.corp.local systemd[1]: Started The Salt Master Server.
May 15 16:39:27 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_rpc_queue: could not connect to SSE server: [Errno 111] Connection refused
May 15 16:39:27 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_event_queue: could not connect to SSE server: [Errno 111] Connection refused
May 15 16:39:27 ssc-01a.corp.local salt-master[3035]: [ERROR   ] Failed to get the salt environments: [Errno 111] Connection refused
May 15 16:39:28 ssc-01a.corp.local salt-master[3035]: [ERROR   ] Failed to retrieve commands from SSE: [Errno 111] Connection refused
May 15 16:39:32 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_rpc_queue: could not connect to SSE server: [Errno 111] Connection refused
May 15 16:39:32 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_event_queue: could not connect to SSE server: [Errno 111] Connection refused
May 15 16:39:37 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_rpc_queue: could not connect to SSE server: [Errno 111] Connection refused
May 15 16:39:38 ssc-01a.corp.local salt-master[3035]: [ERROR   ] Failed to retrieve commands from SSE: [Errno 111] Connection refused
May 15 16:39:38 ssc-01a.corp.local salt-master[3035]: [ERROR   ] sseapi_event_queue: could not connect to SSE server: [Errno 111] Connection refused

The way i got around the error was by editing the /etc/salt/master.d/raas.conf. The file seems to be missing a key auth parameter. The engines section should look like this

engines:
  - sseapi: {}
  - eventqueue: {}
  - rpcqueue: {}
  - jobcompletion: {}
  - keyauth: {}

After restarting the salt master i was able to verify that the error was gone. To restart the service i ran

systemctl restart salt-master

To verify the status i ran

systemctl restart salt-master
* salt-master.service - The Salt Master Server
   Loaded: loaded (/lib/systemd/system/salt-master.service; enabled; vendor preset: enabled)
   Active: active (running) since Sun 2022-05-15 20:02:56 UTC; 51s ago
     Docs: man:salt-master(1)
           file:///usr/share/doc/salt/html/contents.html
           https://docs.saltproject.io/en/latest/contents.html
 Main PID: 31309 (salt-master)
    Tasks: 39 (limit: 9830)
   Memory: 330.0M
   CGroup: /system.slice/salt-master.service
           |-31309 /bin/python3 /usr/bin/salt-master
           |-31315 /bin/python3 /usr/bin/salt-master
           |-31320 /bin/python3 /usr/bin/salt-master
           |-31323 /bin/python3 /usr/bin/salt-master
           |-31325 /bin/python3 /usr/bin/salt-master
           |-31326 /bin/python3 /usr/bin/salt-master
           |-31327 /bin/python3 /usr/bin/salt-master
           |-31328 /bin/python3 /usr/bin/salt-master
           |-31330 /bin/python3 /usr/bin/salt-master
           |-31397 /bin/python3 /usr/bin/salt-master
           |-31398 /bin/python3 /usr/bin/salt-master
           |-31400 /bin/python3 /usr/bin/salt-master
           |-31411 /bin/python3 /usr/bin/salt-master
           |-31412 /bin/python3 /usr/bin/salt-master
           |-31413 /bin/python3 /usr/bin/salt-master
           |-31414 /bin/python3 /usr/bin/salt-master
           |-31415 /bin/python3 /usr/bin/salt-master
           `-31416 /bin/python3 /usr/bin/salt-master

May 15 20:02:54 ssc-01a.corp.local systemd[1]: Starting The Salt Master Server...
May 15 20:02:56 ssc-01a.corp.local systemd[1]: Started The Salt Master Server.

VMware documentation also talks about the procedure above in the Upgrade the Master Plugin documentation found here

If the status page a return similar to this

 [py.warnings      :110 ][WARNING ][5488] /usr/lib/python3.7/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.25.11) or chardet (4.0.0) doesn't match a supported version!
  RequestsDependencyWarning)

Follow my other guide here

vRSLCM 8.x change admin@local password via API

I recently had an use case where i wanted to change the admin@local LCM password via an API call in order to automate the password rotation.

If you need a guide to get started you can find my other blog here

To change the password we can use postman PUT call to https://$vRLCM/lcm/authzn/api/v2/users/password

Don`t forget to include the new password under the body field formatted in JSON format ex:

We can also leverage curl using:

curl -k --location --request PUT 'https://$vRSLCM/lcm/authzn/api/v2/users/password' \
--header 'Accept: application/json' \
--header 'Content-Type: application/json' \
--header 'Authorization: Basic Token' \
--data-raw '{
  "password": "new_password",
  "username": "admin@local"
}'

Don`t forget to replace the token with a properly encoded base64 token. Instructions are found on my other blog here

vRSLCM 8 API getting started

I`ve been having a hard time finding one article that covers the vRSLCM (vRealize Suite Lifecycle Manager) API. The official documentation can be found here

As we can see we can leverage the swagger UI by going to https://$vRLCM/api/swagger-ui.html but… i wanted to leverage curl from the cli or postman and as per best practices i wanted to generate a Bearer token.

First step was to authenticate using the credentials. We can do so in Postman by completing the Authorization fields using basic auth and running an POST against https://$vRLCM/lcm/authzn/api/login Example:

If we want to run it via curl we need to generate the credentials via a base64 encoded format. Luckily there is an easy converter at https://www.base64encode.org/ The format should be username:password. Ex:

Now that we have the encoded version we can leverage a simple curl command. In my case i also added a -k at the end to ignore the invalid SSL certificate

curl --location --request POST 'https://$vRLCM/lcm/authzn/api/login' \
--header 'Authorization: Basic YWRtaW5AbG9jYWw6cGFzc3dvcmQ=' -k

If correct the command will return a simple Login succeessfully message.

Now we can use the Authorization to query different things like checking the health. Looking at the swagger UI we can see that we require a get to /lcm/health/api/vs2/status

Example in Postman:

or via curl:

curl -X GET "https://$vRLCM/lcm/health/api/v2/status" -H  "accept: application/json"  --header 'Authorization: Basic YWRtaW5AbG9jYWw6cGFzc3dvcmQ=' -k

Installing an vROPS management pack via vRSLCM

This post we will be going over installing an vROPS management pack via vRSLCM. (If you haven’t added your My VMware credentials you will need to do that first by going to vRealize Lifecycle Manager -> Lifecycle Operations -> Settings -> My VMware)

Once logged on to vRSLCM click on the marketplace.

Alternatively we can also navigate to the marketplace by navigating to it via the side menu

In my case i want to install SDDC Health Monitoring Solution. Searching for sddc in the search box gives a number of results. In my case the latest version is 8.6.1

We can click directly on download or we can click on view details to show what the management pack provides

Once clicking on download we are presented with the EULA Agreement. Once reviewed we can click on next so we can enter some user information.

We can completed the required fields marked with an red asterisk and click on download to download the package.

Since the download was very small it completed relatively quickly in my environment. If we want to see the progress of the download we can navigate to Lifecycle Operations -> Requests. Once completed we can come back to the marketplace and we are presented with an install button. Click on install

Select which environment and datacenter we want to install the management pack in and lick on install.

We can view the progress by clicking on check request status on the bottom of the page

Once the installation reports as completed we can go to vROPS and verify that it was successfully installed

Navigating to the vROPS repository we can see that the management pack was successfully installed as well as configured

Change Delete old snapshot restriction from 7 days in Automation Central

I recently ran through an issue where i wanted to automatically delete snapshots after 5 days instead of the default 7 days that comes out of box in automation central. I wanted to change delete old snapshot restriction from 7 days to 5 days.

It seems like the restriction comes from the Reclaim Settings. In order to change it we can go to Optimize -> Reclaim -> Settings

Change the Snapshots setting to older than

Going to Automation Central i was able to confirm that the minimum is now 5 days

vROPS Cloud Proxy docker routing

I recently ran into a problem where i needed to route the subnet that is being used for the docker routing on the vROPS Cloud Proxy appliance. The network used is 172.17.0.0/16. For this we will try to route a portion of that subnet through my ethernet interface

To test the configuration out without adding a persistent route i used the following command:

ip route add 172.17.11.0/24 via 172.16.11.1 dev eth0 

Verify that everything is running properly, if anything went wrong the route should get reverted on the next reboot. If the changes are successful we can add the route as persistent by editing the network configuration using vi:

vi /etc/systemd/network/10-eth0.network

Next add something similar to the Route option

[Route]
Destination=172.17.11.0/24
Gateway=172.16.11.1

Please note that the above is not official guidance from VMware. If you need support please reach out to technical support.

Deploying the vROPS cloud proxy

In this guide we will go over Deploying the vROPS cloud proxy for cloud as well as on premise. The official VMware documentation can be found here

To get started log in to your vROPS instance. If its cloud it would be something similar to https://www.mgmt.cloud.vmware.com/vrops-cloud/ui/index.action For on premise it would be https://vrops_url/ui/login.action

Once you are logged on We can go to Data Sources -> Cloud Proxies and press on New. example:

Here we can see the download cloud proxy OVA option as well as a copy button. We can also see the OTK key. Keep a note of this as we will need it during the deployment. The first step is do get the proxy deployed. by either downloading the OVA or by copying the url. In my case i will copy the URL.

For reference you should be at this screen:

Click on the clipboard icon to copy the path to the ova

Next we will go to our vCenter to get it deployed. Go to one of the hosts or clusters, then go to actions -> Deploy OVF Template… example:

If you are deploying the cloud proxy for vROPS cloud the URL will look similar to this:

If its for on premise it would look similar to this:

Click on next and accept the certificate thumbprint

Select a name and location where the deployment should go and click on Next

Select a compute resource and click on Next

Review the details of the deployment and click on next

Accept the licensing agreement and click on next

Select a size and click next. If the environment is larger than 8k VMs you would want to deploy the Standard size. The sizing guide can be found here

Select a storage device and click on Next

Select a network and click on Next

Here is where we would add that OTK key from earlier. Paste in the OTK key. Give the VM a friendly name (this name will be what`s displayed in vROPS cloud proxies page. The network Proxy Settings are only applicable if you need to use a proxy to get out to the internet. The rest of the fields should be pretty self explanatory

Verify everything is correct and click on finish

Once the deployment is complete power on the machine. At this time we need to wait for a couple of minutes before it appears under the cloud proxies. It took about 20 minutes in my environment before i was able to see it in the vROPS cloud console

We can check the console while we wait for everything to be provisioned

Once the deployment is complete the console would look similar to this:

We can also see the proxy coming online in the vROPS cloud proxies menu

Once complete the proxy will show as online

The full vROPS cloud documentation can be found here

The full vROPS on premise documentation can be found here

You can request a trial from here

Workaround instructions to address CVE-2021-44228 and CVE-2021-45046 in vRealize Operations 7.x

In this article i will go over one of the workaround instructions to address CVE-2021-44228 and CVE-2021-45046 in vRealize Operations 7.x. I have tested the workaround on vROPS 7.5 as its still shipped with VCF 3.x and i haven’t yet seen documentation on a workaround for this version. If you are looking for instructions for version 8.x consult kb article 87076. This has been tested on December 21 2021. Please check the official documentation or open a ticket for production usage.

Create a snapshot of the vROPS components to make sure we have something to revert to in case anything were to go wrong.

Log into the vROPS instance admin UI typically https://ip_address/admin and take the cluster offline. This applies to all nodes including but not limited to Analytic, Primary, Replica, Data, Remote Collectors and Witness nodes.

Give a reason and press ok

Verify the cluster is offline before continuing

Log in via ssh to a temporary path ex /tmp. Because vROPS 7.5 doesn`t come with the newer OpenSSL modules we need to find other means to get the files to the server without using a direct download method like wget.

In my case in put the code below in a file called vrops-log4j-fix.sh in my /tmp directory

#!/bin/bash

file=/tmp/impacted_jars.txt

echo "Searching for impacted .jar files. Please wait..."

find /usr/lib -type f -name "*.jar" -exec sh -c "zipinfo -1 '{}' | grep "org/apache/logging/log4j/core/lookup/JndiLookup.class" && echo {}" \; | grep "/usr/lib" > $file

line_qty=$(wc -l < $file)

if [ $line_qty -ne 0 ]; then
    echo "Found $line_qty impacted .jar files"
else
    echo "No impacted .jar files found"
    exit 0
fi

echo "Starting to patch impacted .jar files"

while IFS= read -r line;
do     
    echo "patching -> $line"

    own_user=$(stat -c '%U' "$line")
    own_group=$(stat -c '%G' "$line")

    zip -q -d "$line" org/apache/logging/log4j/core/lookup/JndiLookup.class
    if [ $? -ne 0 ]; then echo "ERROR: Fail to Patch $line"; fi

    chown $own_user:$own_group "$line"

done < $file

rm -f $file

Make the file executable by running chmod +x vrops-log4j-fix.sh

And then run the script by running ./vrops-log4j-fix.sh

The system will go through and find impacted .jar files and try to patch them. If successful we should end up with something like this

Next we will do the same with cp-log4j-fix.sh file

#!/bin/bash

#set -x


FAILURE="0"
WRAPPER_FILES="/usr/lib/vmware-vcops/user/conf/collector/wrapper.conf"
for f in $WRAPPER_FILES
    do
        last_idx=""
        if [[ -f $f ]]; then
            echo "********************************"
            echo "Updating file: $f"
            let last_idx=$(egrep "^wrapper.java.additional.[[:digit:]]+=" $f | cut -d= -f1 | awk -F '.' '{print $4}' | sort -n | tail -1)
            if [[ -z $last_idx ]]; then
                echo -e "ERROR: Failed to get JVM additional index"
                let FAILURE="1"
                continue
            fi
            ((last_idx++))
            echo -e "\n#Fixing Apache Log4j2 Remote Code Execution Vulnerability\nwrapper.java.additional.$last_idx=-Dlog4j2.formatMsgNoLookups=true" >> $f
            if [[ $? != 0 ]]; then
                echo -e "ERROR: Failed to update file: $f\n"
                let FAILURE="1"
            else 
                echo -e "Sucessfully updated file: $f\n"
            fi
        else
            echo -e "ERROR: file is not found: $f\n"
            let FAILURE="1"
        fi
    done


CASA_JVM="/usr/lib/vmware-casa/casa-webapp/bin/setenv.sh"
echo "********************************"
echo "Updating file: $CASA_JVM"
echo 'JAVA_OPTS="$JAVA_OPTS -Dlog4j2.formatMsgNoLookups=true"' >> $CASA_JVM
if [[ $? != 0 ]]; then
    echo -e "ERROR: Failed to update file: $CASA_JVM\n"
    let FAILURE="1"
else
    echo -e "Sucessfully updated file: $CASA_JVM\n"
fi

if [[ "X$FAILURE" == "X1" ]]; then
    exit 1
fi

exit 0

and lastly the data-rc-witness-log4j-fix.sh file

#!/bin/bash

#set -x


FAILURE="0"
WRAPPER_FILES="/usr/lib/vmware-vcops/user/conf/analytics/wrapper.conf
/usr/lib/vmware-vcops/user/conf/collector/wrapper.conf
/usr/lib/vmware-vcops/user/conf/gemfire/wrapper.conf
/usr/lib/vmware-vcops/user/conf/tomcat-enterprise/wrapper.conf"
for f in $WRAPPER_FILES
    do
        last_idx=""
        if [[ -f $f ]]; then
            echo "********************************"
            echo "Updating file: $f"
            let last_idx=$(egrep "^wrapper.java.additional.[[:digit:]]+=" $f | cut -d= -f1 | awk -F '.' '{print $4}' | sort -n | tail -1)
            if [[ -z $last_idx ]]; then
                echo -e "ERROR: Failed to get JVM additional index"
                let FAILURE="1"
                continue
            fi
            ((last_idx++))
            echo -e "\n#Fixing Apache Log4j2 Remote Code Execution Vulnerability\nwrapper.java.additional.$last_idx=-Dlog4j2.formatMsgNoLookups=true" >> $f
            if [[ $? != 0 ]]; then
                echo -e "ERROR: Failed to update file: $f\n"
                let FAILURE="1"
            else 
                echo -e "Sucessfully updated file: $f\n"
            fi
        else
            echo -e "ERROR: file is not found: $f\n"
            let FAILURE="1"
        fi
    done


CATALINA_FILES="/usr/lib/vmware-casa/casa-webapp/bin/setenv.sh
/usr/lib/vmware-vcops/tomcat-web-app/bin/setenv.sh"

for f in $CATALINA_FILES
    do
        if [[ -f $f ]]; then
            echo "********************************"
            echo "Updating file: $f"
            echo 'JAVA_OPTS="$JAVA_OPTS -Dlog4j2.formatMsgNoLookups=true"' >> $f
            if [[ $? != 0 ]]; then
                echo -e "ERROR: Failed to update file: $f\n"
                let FAILURE="1"
            else 
                echo -e "Sucessfully updated file: $f\n"
            fi
        else
            echo -e "ERROR: file is not found: $f\n"
            let FAILURE="1"
        fi
    done

if [[ "X$FAILURE" == "X1" ]]; then
    exit 1
fi

exit 0

It would look similar to this in the end

To verify that CVE-2021-44228 was applied run the following

ps axf | grep --color log4j2.formatMsgNoLookups | grep -v grep

Running ./vrops-log4j-fix.sh will also verify that there are no .jar files that need to be patched

Next bring the instance back online in the admin console