Posted 18 Hours Ago Job ID: 2109118 12 quotes received

DataBricks Upload workbook in Sharepoint

Fixed PriceUnder $250
Quotes (12)  ·  Premium Quotes (0)  ·  Invited (0)  ·  Hired (0)

  Send before: August 06, 2025

Send a Quote

Programming & Development Programming & Software

The below Databricks Notebook named "Workbooks.py" uses a metadata dictionary (control_metadata) to dynamically create multiple worksheets within two Excel workbooks—

workbook_staff and workbook_member. Each workbook is saved to a specified .xlsx

 file path and securely uploaded to both Azure Blob Storage and SharePoint Online. All worksheets 

 are password-protected, and data rows are programmatically hidden to prevent casual access or modification.


Issue:  

When the spreadsheets are reopened via SharePoint, the previously hidden rows become visible again, 

and worksheet protection appears to be lost—allowing users to view and potentially edit the contents.

workbooks.py:-

import os

import json

import logging

import pandas as pd

import numpy as np

from pyspark.sql import SparkSession, col

from datetime import datetime

import xlsxwriter


from azure.storage.blob import BlobServiceClient

from azure.identity import ClientSecretCredential

from office365.sharepoint.client_context import ClientContext

from io import BytesIO


# Setup logging

log = logging.getLogger("Pipeline-bcp_plus_critical_member_data")

log.setLevel(logging.INFO)

handler = logging.StreamHandler()

handler.setFormatter(logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s"))

log.addHandler(handler)

log.info("Starting Pipeline bcp_plus_critical_member_data")


# Load JSON config

config_path = "Workspace/Shared/bcp/config.json"

with open(config_path, encoding="utf-8") as f:

    config = json.load(f)


# Read config values

file_path                        = config["file_path"]

staff_file_name                  = config["staff_file_name"]

member_file_name                 = config["member_file_name"]

output_container_name            = config["output_container_name"]

output_blob_staff                = config["output_blob_staff"]

output_blob_member               = config["output_blob_member"]

output_storage_connection_string = config["output_storage_connection_string"]

sp_file_path_staff               = config["SharePointPathStaff"]

sp_file_path_member              = config["SharePointPathMember"]

url                              = config["url"]

client_id                        = config["client_id"]


# Spark + Secrets

spark = SparkSession.builder.getOrCreate()


## Use correct secret key for workbook password

workbook_protection_password = dbutils.secrets.get("mdp-secrets", key="workbook-password")


# Retrieve managed identity credential

uc_credential              = "bac-mdp-dev-mel"

managedidentity_credential = dbutils.credentials.getServiceCredentialsProvider(uc_credential)


# Blob Service

output_blob_service_client = BlobServiceClient(

    account_url=output_storage_connection_string,

    credential=managedidentity_credential

)


xlsx_path_staff  = os.path.join(file_path, staff_file_name)

xlsx_path_member = os.path.join(file_path, member_file_name)


# -------------------------------------------------------------------

## Combine file‐open encryption + workbook structure protection

# -------------------------------------------------------------------

workbook_staff = xlsxwriter.Workbook(

    xlsx_path_staff,

    {

        'constant_memory': True,                         ## required for encryption to persist

        'strings_to_numbers': True,

        'password': workbook_protection_password,        ## file‐open AES encryption

        'workbook_properties': {                         ## lock workbook structure

            'protection': {

                'password':       workbook_protection_password,

                'lock_structure': True

            }

        }

    }

)


workbook_member = xlsxwriter.Workbook(

    xlsx_path_member,

    {

        'constant_memory': True,                         # >>> CHANGED

        'strings_to_numbers': True,

        'password': workbook_protection_password,        # >>> CHANGED

        'workbook_properties': {                         # >>> CHANGED

            'protection': {

                'password':       workbook_protection_password,

                'lock_structure': True

            }

        }

    }

)


# Cell sanitizer

def sanitize(val):

    if isinstance(val, np.ndarray):

        return json.dumps(val.tolist())

    if isinstance(val, np.generic):

        return val.item()

    if isinstance(val, pd.Timestamp):

        return val.to_pydatetime()

    if pd.isna(val):

        return None

    if isinstance(val, (dict, list)):

        return json.dumps(val)

    return val


#  control metadata

control_metadata = [

    {"worksheet_name": "MmeberAccount1", "rows_per_worksheet": 1000000,

     "extract_script": "select * from dev.TableMmeberAccount", "file_type": "Staff"},

    {"worksheet_name": "MmeberAccount2", "rows_per_worksheet": 1000000,

     "extract_script": "select * from dev.TableMmeberAccount", "file_type": "Staff"},

    {"worksheet_name": "MmeberAccount3", "rows_per_worksheet": 1000000,

     "extract_script": "select * from dev.TableMmeberAccount", "file_type": "member"},

    {"worksheet_name": "MmeberAccount4", "rows_per_worksheet": 1000000,

     "extract_script": "select * from dev.TableMmeberAccount", "file_type": "member"}

]

metadata_rows = control_metadata


for row in metadata_rows:

    ws_name   = (row["worksheet_name"] or "Sheet")[:25]

    script    = row["extract_script"]

    max_rows  = int(row["rows_per_worksheet"])

    file_type = row["file_type"].strip().lower()


    # pick the right workbook

    workbook = workbook_staff if file_type == "staff" else workbook_member


    try:

        log.info(f"Processing {file_type} worksheet: {ws_name}")

        df = spark.sql(script)


        # cast timestamps to string

        for field in df.schema.fields:

            if field.dataType.simpleString().startswith("timestamp"):

                df = df.withColumn(field.name, col(field.name).cast("string"))


        pdf = df.toPandas()

        total_rows = len(pdf)

        parts = (total_rows // max_rows) + (1 if total_rows % max_rows else 0)

        log.info(f"Pandas shape: {pdf.shape}, splitting into {parts} part(s)")


        for part in range(parts):

            start, end = part * max_rows, min((part + 1) * max_rows, total_rows)

            sheet_name = f"{ws_name}_{part+1}"[:31]

            sheet = workbook.add_worksheet(sheet_name)


            # Write headers

            for ci, cname in enumerate(pdf.columns):

                sheet.write(0, ci, cname)


            # Write data rows

            for ri, row_vals in enumerate(pdf.iloc[start:end].values, start=1):

                for ci, cell in enumerate(row_vals):

                    sheet.write(ri, ci, sanitize(cell))

                sheet.set_row(ri, None, None, {'hidden': True})


            ## Protect each sheet so rows stay hidden & cells locked

            sheet.protect(password=workbook_protection_password)


    except Exception as e:

        log.error(f"Error in worksheet {ws_name}: {e}", exc_info=True)

        continue


# Close workbooks to apply all encryption & protection

workbook_staff.close()

workbook_member.close()


# Upload to ADLS

for fname, xpath, blob_name in [

    (staff_file_name,  xlsx_path_staff,  output_blob_staff  + staff_file_name),

    (member_file_name, xlsx_path_member, output_blob_member + member_file_name)

]:

    blob_client = output_blob_service_client.get_blob_client(

        container=output_container_name,

        blob=blob_name

    )

    with open(xpath, "rb") as f:

        blob_client.upload_blob(f, overwrite=True)

        log.info(f"Uploaded {fname} to {output_container_name}/{blob_name}")


# SharePoint Upload

client_secret = dbutils.secrets.get("mdp-secrets", key="sp-client-secret")

tenant_id     = "yyyyy"

sp_cred       = ClientSecretCredential(tenant_id, client_id, client_secret)

ctx           = ClientContext(url).with_credentials(sp_cred)

ctx.web.get().execute_query()


folder_staff  = ctx.web.get_folder_by_server_relative_url(sp_file_path_staff)

folder_member = ctx.web.get_folder_by_server_relative_url(sp_file_path_member)


for fname, blob_name, folder in [

    (staff_file_name,  output_blob_staff,  folder_staff),

    (member_file_name, output_blob_member, folder_member),

]:

    blob_client = output_blob_service_client.get_blob_client(

        container=output_container_name,

        blob=blob_name

    )

    data   = blob_client.download_blob().readall()

    buffer = BytesIO(data)

    upload_file = folder.files.add(fname, buffer.getvalue(), overwrite=True).execute_query()

    log.info(f"Uploaded {fname} to SharePoint at: {upload_file.serverRelativeUrl}")

    

    



... Show more
Praveen K Australia