Spaces:

OOI-FrontierTech
/

supply-roster-optimization

Sleeping

App Files Files Community

HaLim commited on Aug 7, 2025

Commit

cd87ae5

0 Parent(s):

first push with real optimizer

Browse files

Files changed (16) hide show

.gitignore +69 -0
README.md +62 -0
notebook/__init__.py +1 -0
pyproject.toml +26 -0
requirements.txt +256 -0
src/__init__.py +1 -0
src/config/__init__.py +0 -0
src/config/optimization_config.py +37 -0
src/models/__init__.py +1 -0
src/models/optimizer_real.py +336 -0
src/utils/__init__.py +1 -0
src/utils/excel_to_csv_converter.py +126 -0
src/utils/file_utils.py +21 -0
src/visualization/Home.py +73 -0
src/visualization/pages/1_optimize_viz.py +424 -0
src/visualization/pages/2_metadata.py +300 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,69 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg
+*.egg-info/
+dist/
+build/
+.Python
+# Virtual Environment
+env/
+venv/
+.venv/
+.env
+.ENV/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Data files (usually large)
+*.csv
+*.xlsx
+*.xls
+data/*.csv
+data/*.xlsx
+data/*.xls
+# Results and outputs
+results/
+outputs/
+logs/
+*.log
+# Jupyter
+.ipynb_checkpoints/
+*.ipynb
+# Model files
+*.h5
+*.pkl
+*.joblib
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Documentation
+docs/_build/
+# Large files
+*.zip
+*.tar.gz
+*.rar

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# Supply Roster Optimization Tool
+A Python-based tool for optimizing supply roster scheduling using linear programming.
+## Features
+- Multi-day optimization with employee scheduling
+- Support for multiple product types and shift patterns
+- Configurable constraints for labor costs and productivity
+- OR-Tools integration for optimization
+- Streamlit and Gradio interfaces for visualization
+## Project Structure
+```
+├── src/
+│   ├── config/           # Configuration files
+│   ├── models/           # Optimization models
+│   ├── utils/            # Utility functions
+│   └── visualization/    # Web interfaces
+├── data/                 # Data files (not tracked)
+├── results/              # Output files (not tracked)
+├── requirements.txt      # Python dependencies
+├── pyproject.toml        # Project configuration
+└── README.md
+```
+## Setup
+1. Create virtual environment:
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+## Usage
+```bash
+# Run the optimization
+python src/models/optimizer_real.py
+# Start web interface
+streamlit run src/visualization/Home.py
+```
+## Dependencies
+- Python >= 3.8
+- OR-Tools >= 9.4.0
+- Pandas >= 1.3.0
+- Streamlit >= 1.18.0
+- Plotly >= 5.8.0
+## License
+MIT License

notebook/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ import src

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[project]
+name = "supply-roster-tool-real"
+version = "0.1.0"
+description = ""
+authors = [
+    {name = "HaLim Jun",email = "[email protected]"}
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.10,<3.11"
+dependencies = [
+    "pandas>=2.1.3",
+    "or-tools>=10.0.0",
+    "numpy>=1.26.4",
+    "matplotlib>=3.8.0",
+    "seaborn>=0.13.2",
+    "scipy>=1.13.0",
+    "scikit-learn>=1.3.2",
+    "statsmodels>=0.14.4",
+    "plotly>=5.19.0",
+]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements.txt ADDED Viewed

	@@ -0,0 +1,256 @@

+absl-py==2.2.2
+affine==2.4.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.6
+aiosignal==1.3.2
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.8.0
+appnope==0.1.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asgiref==3.8.1
+asttokens==3.0.0
+async-lru==2.0.5
+async-timeout==5.0.1
+attrs==25.3.0
+babel==2.17.0
+backports.tarfile==1.2.0
+beautifulsoup4==4.13.3
+bleach==6.2.0
+blinker==1.9.0
+boto3==1.38.5
+botocore==1.38.5
+branca==0.8.1
+build==1.2.2.post1
+CacheControl==0.14.3
+cachetools==5.5.2
+cdsapi==0.7.6
+certifi==2025.1.31
+cffi==1.17.1
+cftime==1.6.4.post1
+chardet==5.2.0
+charset-normalizer==3.4.1
+cleo==2.1.0
+click==8.1.8
+click-plugins==1.1.1
+cligj==0.7.2
+comm==0.2.2
+contourpy==1.3.1
+crashtest==0.4.1
+cycler==0.12.1
+datasets==3.6.0
+debugpy==1.8.11
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.8
+distlib==0.3.9
+distro==1.9.0
+dulwich==0.22.8
+ecmwf-datastores-client==0.2.0
+et_xmlfile==2.0.0
+exactextract==0.2.1
+exceptiongroup==1.2.2
+executing==2.1.0
+Faker==37.1.0
+fastapi==0.115.12
+fastcore==1.8.0
+fastjsonschema==2.21.1
+fastprogress==1.0.3
+ffmpy==0.5.0
+filelock==3.18.0
+findpython==0.6.3
+fiona==1.10.1
+Flask==3.0.2
+Flask-Cors==4.0.0
+folium==0.19.5
+fonttools==4.56.0
+fqdn==1.5.1
+frozenlist==1.6.0
+fsspec==2025.3.0
+geopandas==1.0.1
+geowrangler==0.5.1
+gitdb==4.0.12
+GitPython==3.1.44
+gradio==5.29.1
+gradio_client==1.10.1
+groovy==0.1.2
+h11==0.14.0
+h3==4.2.2
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.31.2
+idna==3.10
+immutabledict==4.2.1
+importlib_metadata==8.7.0
+installer==0.7.0
+ipykernel==6.29.5
+ipython==8.31.0
+ipywidgets==8.1.5
+isoduration==20.11.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.1.0
+jedi==0.19.2
+Jinja2==3.1.5
+jiter==0.9.0
+jmespath==1.0.1
+joblib==1.4.2
+json5==0.12.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.3.6
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+keyring==25.6.0
+kiwisolver==1.4.8
+loguru==0.7.3
+mangum==0.19.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.10.1
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.3
+more-itertools==10.7.0
+morecantile==6.2.0
+msgpack==1.1.0
+multidict==6.4.4
+multiprocess==0.70.16
+multiurl==0.3.6
+narwhals==1.35.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+netCDF4==1.7.2
+notebook==7.3.3
+notebook_shim==0.2.4
+numpy==2.2.5
+openai==1.66.3
+opencv-python==4.11.0.86
+openpyxl==3.1.5
+orjson==3.10.18
+ortools==9.12.4544
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+patsy==1.0.1
+pbs-installer==2025.5.17
+pexpect==4.9.0
+pillow==11.1.0
+pkginfo==1.12.1.2
+platformdirs==4.3.6
+plotly==6.1.0
+poetry==2.1.3
+poetry-core==2.1.3
+polars==1.26.0
+-e git+https://github.com/halim-jun/StudyMLOps.git@2e1b97c9d8196552a23dd5a4c536f25e53c033dc#egg=project
+prometheus_client==0.21.1
+prompt_toolkit==3.0.48
+propcache==0.3.1
+protobuf==5.29.4
+psutil==6.1.1
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydeck==0.9.1
+pydub==0.25.1
+Pygments==2.19.1
+pyogrio==0.10.0
+pyparsing==3.2.3
+pyproj==3.7.1
+pyproject_hooks==1.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.1
+pyzmq==26.2.0
+RapidFuzz==3.13.0
+rasterio==1.4.3
+rasterstats==0.20.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==14.0.0
+rpds-py==0.24.0
+ruff==0.11.10
+s3transfer==0.12.0
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+seaborn==0.13.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentencepiece==0.2.0
+shapely==2.0.7
+shellingham==1.5.4
+simplejson==3.20.1
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+starlette==0.46.2
+statsmodels==0.14.4
+streamlit==1.44.1
+streamlit_folium==0.25.0
+tenacity==9.1.2
+terminado==0.18.1
+threadpoolctl==3.6.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.2
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.52.4
+trove-classifiers==2025.5.9.12
+typer==0.15.4
+types-python-dateutil==2.9.0.20241206
+typing_extensions==4.12.2
+tzdata==2025.2
+uri-template==1.3.0
+urllib3==2.3.0
+uvicorn==0.34.2
+virtualenv==20.31.2
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+Werkzeug==3.1.3
+widgetsnbextension==4.0.13
+xarray==2025.6.1
+xattr==1.1.4
+xxhash==3.5.0
+xyzservices==2025.1.0
+yarl==1.20.0
+zipp==3.21.0
+zstandard==0.23.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/config/__init__.py ADDED Viewed

File without changes

src/config/optimization_config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+DATE_SPAN = list(range(1, 8))
+PRODUCT_LIST = ["a", "b", "c"]
+EMPLOYEE_LIST = ["x", "y"]
+SHIFT_LIST = [1, 2, 3]
+LINE_LIST = ["long", "short"]
+LINE_LIST_PER_TYPE = {"long": 2, "short": 3}
+DEMAND_LIST = {"a": 1000, "b": 600, "c": 400}
+COST_LIST_PER_EMP_SHIFT = {
+    "x": {1: 15, 2: 22, 3: 18},
+    "y": {1: 19, 2: 27, 3: 23},
+}
+PRODUCTIVITY_LIST_PER_EMP_PRODUCT = {
+    "x": {
+        1: {"a": 10, "b": 8, "c": 7},
+        2: {"a": 9, "b": 7, "c": 6},
+        3: {"a": 9, "b": 7, "c": 6},
+    },
+    "y": {
+        1: {"a": 8, "b": 6, "c": 5},
+        2: {"a": 7, "b": 6, "c": 5},
+        3: {"a": 7, "b": 6, "c": 5},
+    },
+}
+MAX_EMPLOYEE_PER_TYPE_ON_DAY = {
+    "x": {t: 5 for t in DATE_SPAN},  # EDIT: e.g., {'x': {1:5,2:5,...}, 'y':{1:6,...}}
+    "y": {t: 6 for t in DATE_SPAN},
+}
+MAX_HOUR_PER_PERSON_PER_DAY = 14  # legal standard
+MAX_HOUR_PER_SHIFT_PER_PERSON = {1: 8, 2: 4, 3: 6}
+CAP_PER_LINE_PER_HOUR = {
+    ("long", 1): 22,
+    ("long", 2): 22,
+    ("short", 1): 16,
+    ("short", 2): 16,
+    ("short", 3): 16,
+}

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the models directory a Python package

src/models/optimizer_real.py ADDED Viewed

	@@ -0,0 +1,336 @@

+# Option A (with lines) + 7-day horizon (weekly demand only)
+# Generalized: arbitrary products (P_all) and day-varying headcount N_day[e][t]
+# -----------------------------------------------------------------------------
+# pip install ortools
+from ortools.linear_solver import pywraplp
+import pandas as pd
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from src.config import optimization_config
+class OptimizerReal:
+    def __init__(self):
+        self.config = optimization_config
+    def solve_option_A_multi_day_generalized(self):
+        # -----------------------------
+        # 1) SETS
+        # -----------------------------
+        # Days
+        D = self.config.DATE_SPAN
+        # Products (master set; you can have many)
+        # Fill with all SKUs that may appear over the week
+        P_all = self.config.PRODUCT_LIST  # EDIT: add/remove products freely
+        # Employee types (fixed to two types x,y; headcount varies by day)
+        E = self.config.EMPLOYEE_LIST
+        # Shifts: 1=usual, 2=overtime, 3=evening
+        S = self.config.SHIFT_LIST
+        # Line types and explicit line list
+        T_line = self.config.LINE_LIST
+        K = self.config.LINE_LIST_PER_TYPE  # number of physical lines per type (EDIT)
+        L = [
+            (t, i) for t in T_line for i in range(1, K[t] + 1)
+        ]  # pair of line type and line number (e.g., ('long', 1))
+        # -----------------------------
+        # 2) PARAMETERS (EDIT THESE)
+        # -----------------------------
+        # Weekly demand (units) for each product in P_all
+        d_week = self.config.DEMAND_LIST
+        # Daily activity toggle for each product (1=can be produced on day t; 0=cannot)
+        # If a product is not active on a day, we force its production and hours to 0 that day.
+        active = {
+            t: {p: 1 for p in P_all} for t in D
+        }  # EDIT per day if some SKUs are not available
+        # Per-hour labor cost by employee type & shift
+        c = self.config.COST_LIST_PER_EMP_SHIFT
+        # Productivity q[e][s][p] = units per hour (assumed line-independent here)
+        # Provide entries for ALL products in P_all
+        q = self.config.PRODUCTIVITY_LIST_PER_EMP_PRODUCT
+        # If productivity depends on line, switch to q_line[(e,s,p,ell)] and use it in constraints.
+        # Day-varying available headcount per type
+        # N_day[e][t] = number of employees of type e available on day t
+        N_day = self.config.MAX_EMPLOYEE_PER_TYPE_ON_DAY
+        # Limits
+        Hmax_daily_per_person = (
+            self.config.MAX_HOUR_PER_PERSON_PER_DAY
+        )  # per person per day
+        Hmax_s = self.config.MAX_HOUR_PER_SHIFT_PER_PERSON  # per-shift hour caps
+        # Per-line unit/hour capacity (physical)
+        Cap = self.config.CAP_PER_LINE_PER_HOUR
+        # Fixed regular hours for type x on shift 1
+        # Choose either PER-DAY values or a single PER-WEEK total.
+        # Common in practice: per-day fixed hours (regulars show up daily).
+        F_x1_day = {
+            t: 8 * N_day["x"][t] for t in D
+        }  # EDIT if different from "all regulars do full usual shift"
+        F_x1_week = None  # e.g., sum(F_x1_day.values()) if you want weekly instead (then set F_x1_day=None)
+        # Optional skill/compatibility: allow[(e,p,ell)] = 1/0 (1=allowed; 0=forbid)
+        allow = {}
+        for e in E:
+            for p in P_all:
+                for ell in L:
+                    allow[(e, p, ell)] = 1  # EDIT as needed
+        # -----------------------------
+        # 3) SOLVER
+        # -----------------------------
+        solver = pywraplp.Solver.CreateSolver("CBC")  # or 'SCIP' if available
+        if not solver:
+            raise RuntimeError("Failed to create solver. Check OR-Tools installation.")
+        INF = solver.infinity()
+        # -----------------------------
+        # 4) DECISION VARIABLES
+        # -----------------------------
+        # h[e,s,p,ell,t] = worker-hours of type e on shift s for product p on line ell on day t (integer)
+        h = {}
+        for e in E:
+            for s in S:
+                for p in P_all:
+                    for ell in L:
+                        for t in D:
+                            # Upper bound per (e,s,t): shift cap * available headcount that day
+                            ub = Hmax_s[s] * N_day[e][t]
+                            h[e, s, p, ell, t] = solver.IntVar(
+                                0, ub, f"h_{e}_{s}_{p}_{ell[0]}{ell[1]}_d{t}"
+                            )
+        # u[p,ell,s,t] = units of product p produced on line ell during shift s on day t
+        u = {}
+        for p in P_all:
+            for ell in L:
+                for s in S:
+                    for t in D:
+                        u[p, ell, s, t] = solver.NumVar(
+                            0, INF, f"u_{p}_{ell[0]}{ell[1]}_{s}_d{t}"
+                        )
+        # tline[ell,s,t] = operating hours of line ell during shift s on day t
+        tline = {}
+        for ell in L:
+            for s in S:
+                for t in D:
+                    tline[ell, s, t] = solver.NumVar(
+                        0, Hmax_s[s], f"t_{ell[0]}{ell[1]}_{s}_d{t}"
+                    )
+        # ybin[e,s,t] = shift usage binaries per type/day (to gate OT after usual)
+        ybin = {}
+        for e in E:
+            for s in S:
+                for t in D:
+                    ybin[e, s, t] = solver.BoolVar(f"y_{e}_{s}_d{t}")
+        # -----------------------------
+        # 5) OBJECTIVE: Minimize total labor cost over the week
+        # -----------------------------
+        solver.Minimize(
+            solver.Sum(
+                c[e][s] * h[e, s, p, ell, t]
+                for e in E
+                for s in S
+                for p in P_all
+                for ell in L
+                for t in D
+            )
+        )
+        # -----------------------------
+        # 6) CONSTRAINTS
+        # -----------------------------
+        # 6.1 Weekly demand (no daily demand)
+        for p in P_all:
+            solver.Add(
+                solver.Sum(u[p, ell, s, t] for ell in L for s in S for t in D)
+                >= d_week.get(p, 0)
+            )
+        # 6.2 If a product is inactive on a day, force zero production and hours for that day
+        # This makes "varying products per day" explicit.
+        BIG_H = max(Hmax_s.values()) * sum(N_day[e][t] for e in E for t in D)
+        for p in P_all:
+            for t in D:
+                if active[t][p] == 0:
+                    for ell in L:
+                        for s in S:
+                            solver.Add(u[p, ell, s, t] == 0)
+                            for e in E:
+                                solver.Add(h[e, s, p, ell, t] == 0)
+        # 6.3 Labor -> units (per line/shift/day)
+        # If productivity depends on line, swap q[e][s][p] with q_line[(e,s,p,ell)] here.
+        for p in P_all:
+            for ell in L:
+                for s in S:
+                    for t in D:
+                        # Gate by activity (if inactive, both sides are already 0 from 6.2)
+                        solver.Add(
+                            u[p, ell, s, t]
+                            <= solver.Sum(q[e][s][p] * h[e, s, p, ell, t] for e in E)
+                        )
+        # 6.4 Per-line throughput cap (units/hour × line-hours)
+        for ell in L:
+            for s in S:
+                for t in D:
+                    solver.Add(
+                        solver.Sum(u[p, ell, s, t] for p in P_all)
+                        <= Cap[ell] * tline[ell, s, t]
+                    )
+        # 6.5 Couple line hours & worker-hours (single-operator lines → tight equality)
+        for ell in L:
+            for s in S:
+                for t in D:
+                    solver.Add(
+                        tline[ell, s, t]
+                        == solver.Sum(h[e, s, p, ell, t] for e in E for p in P_all)
+                    )
+        # If multi-operator lines (up to Wmax[ell] concurrent workers), replace above with:
+        # Wmax = {ell: 2, ...}
+        # for ell in L:
+        #   for s in S:
+        #     for t in D:
+        #       solver.Add(
+        #           solver.Sum(h[e, s, p, ell, t] for e in E for p in P_all) <= Wmax[ell] * tline[ell, s, t]
+        #       )
+        # 6.6 Fixed regular hours for type x on shift 1
+        if F_x1_day is not None:
+            # Per-day fixed hours
+            for t in D:
+                solver.Add(
+                    solver.Sum(h["x", 1, p, ell, t] for p in P_all for ell in L)
+                    == F_x1_day[t]
+                )
+        elif F_x1_week is not None:
+            # Per-week fixed hours
+            solver.Add(
+                solver.Sum(h["x", 1, p, ell, t] for p in P_all for ell in L for t in D)
+                == F_x1_week
+            )
+        else:
+            raise ValueError(
+                "Specify either F_x1_day (dict by day) or F_x1_week (scalar)."
+            )
+        # 6.7 Daily hours cap per employee type (14h per person per day)
+        for e in E:
+            for t in D:
+                solver.Add(
+                    solver.Sum(
+                        h[e, s, p, ell, t] for s in S for p in P_all for ell in L
+                    )
+                    <= Hmax_daily_per_person * N_day[e][t]
+                )
+        # 6.8 Link hours to shift-usage binaries (per type/day)
+        # Use a type/day-specific Big-M: M_e_s_t = Hmax_s[s] * N_day[e][t]
+        for e in E:
+            for s in S:
+                for t in D:
+                    M_e_s_t = Hmax_s[s] * N_day[e][t]
+                    solver.Add(
+                        solver.Sum(h[e, s, p, ell, t] for p in P_all for ell in L)
+                        <= M_e_s_t * ybin[e, s, t]
+                    )
+        # 6.9 Overtime only after usual (per day). Also bound OT hours <= usual hours
+        for e in E:
+            for t in D:
+                solver.Add(ybin[e, 2, t] <= ybin[e, 1, t])
+                solver.Add(
+                    solver.Sum(h[e, 2, p, ell, t] for p in P_all for ell in L)
+                    <= solver.Sum(h[e, 1, p, ell, t] for p in P_all for ell in L)
+                )
+        # (Optional) evening only after usual:
+        # for e in E:
+        #     for t in D:
+        #         solver.Add(ybin[e, 3, t] <= ybin[e, 1, t])
+        # 6.10 Skill/compatibility mask
+        for e in E:
+            for p in P_all:
+                for ell in L:
+                    if allow[(e, p, ell)] == 0:
+                        for s in S:
+                            for t in D:
+                                solver.Add(h[e, s, p, ell, t] == 0)
+        # -----------------------------
+        # 7) SOLVE
+        # -----------------------------
+        status = solver.Solve()
+        if status != pywraplp.Solver.OPTIMAL:
+            print("No optimal solution. Status:", status)
+            return
+        # -----------------------------
+        # 8) REPORT
+        # -----------------------------
+        print("Objective (min cost):", solver.Objective().Value())
+        print("\n--- Weekly production by product ---")
+        for p in P_all:
+            produced = sum(
+                u[p, ell, s, t].solution_value() for ell in L for s in S for t in D
+            )
+            print(f"{p}: {produced:.1f} (weekly demand {d_week.get(p,0)})")
+        print("\n--- Line operating hours by shift/day ---")
+        for ell in L:
+            for s in S:
+                hours = [tline[ell, s, t].solution_value() for t in D]
+                if sum(hours) > 1e-6:
+                    print(
+                        f"Line {ell} Shift {s}: "
+                        + ", ".join([f"D{t}={hours[t-1]:.2f}h" for t in D])
+                    )
+        print("\n--- Hours by employee type / shift / day ---")
+        for e in E:
+            for s in S:
+                day_hours = [
+                    sum(h[e, s, p, ell, t].solution_value() for p in P_all for ell in L)
+                    for t in D
+                ]
+                if sum(day_hours) > 1e-6:
+                    print(
+                        f"e={e}, s={s}: "
+                        + ", ".join([f"D{t}={day_hours[t-1]:.2f}h" for t in D])
+                    )
+        print("\n--- Implied headcount by type / shift / day ---")
+        for e in E:
+            for s in S:
+                row = []
+                for t in D:
+                    hours = sum(
+                        h[e, s, p, ell, t].solution_value() for p in P_all for ell in L
+                    )
+                    need = int((hours + Hmax_s[s] - 1) // Hmax_s[s])  # ceil
+                    row.append(f"D{t}={need}")
+                if any("=0" not in x for x in row):
+                    print(f"e={e}, s={s}: " + ", ".join(row))
+if __name__ == "__main__":
+    optimizer = OptimizerReal()
+    optimizer.solve_option_A_multi_day_generalized()

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Utility functions and classes for the supply roster tool."""

src/utils/excel_to_csv_converter.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import pandas as pd
+import os
+from pathlib import Path
+def analyze_excel_structure(excel_path):
+    """
+    Analyze the structure of an Excel file and return sheet information.
+    Args:
+        excel_path (str): Path to the Excel file
+    Returns:
+        dict: Dictionary with sheet names and their basic info
+    """
+    try:
+        # Read Excel file to get all sheet names
+        excel_file = pd.ExcelFile(excel_path)
+        sheet_info = {}
+        print(f"📊 Analyzing Excel file: {excel_path}")
+        print(f"📋 Found {len(excel_file.sheet_names)} sheets:")
+        print("-" * 50)
+        for i, sheet_name in enumerate(excel_file.sheet_names, 1):
+            # Read each sheet to get basic information
+            df = pd.read_excel(excel_path, sheet_name=sheet_name)
+            sheet_info[sheet_name] = {
+                'rows': len(df),
+                'columns': len(df.columns),
+                'column_names': list(df.columns)
+            }
+            print(f"{i}. Sheet: '{sheet_name}'")
+            print(f"   - Rows: {len(df)}")
+            print(f"   - Columns: {len(df.columns)}")
+            print(f"   - Column names: {list(df.columns)}")
+            print()
+        return sheet_info
+    except Exception as e:
+        print(f"❌ Error analyzing Excel file: {e}")
+        return None
+def convert_excel_to_csv(excel_path, output_dir=None):
+    """
+    Convert each sheet of an Excel file to a separate CSV file.
+    Args:
+        excel_path (str): Path to the Excel file
+        output_dir (str): Output directory for CSV files. If None, uses same directory as Excel file
+    """
+    try:
+        # Set up output directory
+        if output_dir is None:
+            output_dir = os.path.dirname(excel_path)
+        # Create output directory if it doesn't exist
+        Path(output_dir).mkdir(parents=True, exist_ok=True)
+        # Read Excel file
+        excel_file = pd.ExcelFile(excel_path)
+        print(f"🔄 Converting Excel sheets to CSV...")
+        print(f"📁 Output directory: {output_dir}")
+        print("-" * 50)
+        converted_files = []
+        for i, sheet_name in enumerate(excel_file.sheet_names, 1):
+            # Read the sheet
+            df = pd.read_excel(excel_path, sheet_name=sheet_name)
+            # Create a safe filename for the CSV
+            safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
+            safe_filename = safe_filename.replace(' ', '_')
+            csv_filename = f"{safe_filename}.csv"
+            csv_path = os.path.join(output_dir, csv_filename)
+            # Save as CSV
+            df.to_csv(csv_path, index=False, encoding='utf-8')
+            converted_files.append(csv_path)
+            print(f"✅ {i}. '{sheet_name}' → {csv_filename}")
+            print(f"   - Saved {len(df)} rows, {len(df.columns)} columns")
+        print(f"\n🎉 Successfully converted {len(converted_files)} sheets to CSV files!")
+        return converted_files
+    except Exception as e:
+        print(f"❌ Error converting Excel to CSV: {e}")
+        return None
+def main():
+    """Main function to analyze and convert Excel file"""
+    # Define paths
+    excel_path = "data/real_data_excel/AI Project document.xlsx"
+    output_dir = "data/converted_csv"
+    # Check if Excel file exists
+    if not os.path.exists(excel_path):
+        print(f"❌ Excel file not found: {excel_path}")
+        return
+    print("=" * 60)
+    print("📊 EXCEL TO CSV CONVERTER")
+    print("=" * 60)
+    # Step 1: Analyze Excel structure
+    sheet_info = analyze_excel_structure(excel_path)
+    if sheet_info is None:
+        return
+    # Step 2: Convert to CSV
+    converted_files = convert_excel_to_csv(excel_path, output_dir)
+    if converted_files:
+        print("\n📂 Converted files:")
+        for file_path in converted_files:
+            print(f"   - {file_path}")
+if __name__ == "__main__":
+    main()

src/utils/file_utils.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+def save_dataframes(dataframes, target, save_path):
+    """
+    Save pandas DataFrames to CSV files.
+    Args:
+        dataframes (dict): Dictionary mapping dataset names to DataFrames
+        target (str): Target dataset name to save, or 'all' to save all
+        save_path (str): Path to save the CSV files
+    """
+    os.makedirs(save_path, exist_ok=True)
+    if target == "all":
+        for name, df in dataframes.items():
+            df.to_csv(os.path.join(save_path, f"{name}.csv"), index=False)
+    elif target in dataframes:
+        dataframes[target].to_csv(os.path.join(save_path, f"{target}.csv"), index=False)
+    else:
+        raise ValueError(f"Unknown target: {target}")

src/visualization/Home.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import streamlit as st
+# Page configuration
+st.set_page_config(
+    page_title="Supply Roster Tool",
+    page_icon="🏠",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Initialize session state for shared variables
+if 'data_path' not in st.session_state:
+    st.session_state.data_path = "data/my_roster_data"
+if 'target_date' not in st.session_state:
+    st.session_state.target_date = ""
+# Main page content
+st.title("🏠 Supply Roster Optimization Tool")
+st.markdown("---")
+# Welcome section
+col1, col2 = st.columns([1, 1])
+with col1:
+    st.markdown("""
+    ## 📋 Welcome to Supply Roster Tool
+    """)
+with col2:
+    st.image("images/POC_page/POC_SupplyRoster_image.png",
+             caption="Supply Roster Tool Overview",
+             use_container_width=True)
+# Global settings in sidebar
+with st.sidebar:
+    st.markdown("## 🌐 Global Settings")
+    st.markdown("The setting will be shared across all pages")
+    # Data path setting
+    new_data_path = st.text_input(
+        "📁 Data Path",
+        value=st.session_state.data_path,
+        help="The data path will be shared across all pages"
+    )
+    if new_data_path != st.session_state.data_path:
+        st.session_state.data_path = new_data_path
+        st.success("✅ Data path updated!")
+    st.markdown(f"**Current data path:** `{st.session_state.data_path}`")
+    # Quick navigation
+    st.markdown("## 🧭 Quick Navigation")
+    if st.button("🎯 Go to Optimization", use_container_width=True):
+        st.switch_page("pages/optimize_viz.py")
+    if st.button("📊 Go to Dataset Overview", use_container_width=True):
+        st.switch_page("pages/metadata.py")
+# Main content area
+st.markdown("---")
+# Footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center; color: gray;'>
+    <small>Supply Roster Optimization Tool | Built with Streamlit</small>
+</div>
+""", unsafe_allow_html=True)

src/visualization/pages/1_optimize_viz.py ADDED Viewed

	@@ -0,0 +1,424 @@

+import sys
+import os
+import pandas as pd
+import streamlit as st
+import plotly.express as px
+from datetime import datetime
+# Add parent directory to path to import LaborOptimizer
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+from optimization.labor_optimizer import LaborOptimizer
+def get_available_dates(data_path):
+    """Load the orders data and extract unique dates"""
+    try:
+        orders_file = os.path.join(data_path, "orders.csv")
+        if os.path.exists(orders_file):
+            orders_df = pd.read_csv(orders_file)
+            if "due_date" in orders_df.columns:
+                # Convert to datetime and extract unique dates
+                dates = pd.to_datetime(orders_df["due_date"]).dt.date.unique()
+                # Sort dates in descending order (most recent first)
+                dates = sorted(dates, reverse=True)
+                return dates
+    except Exception as e:
+        st.error(f"Error loading dates: {str(e)}")
+    return []
+def get_metadata_stats(optimizer, target_date=None):
+    """
+    Aggregate metadata statistics about employee costs and availability
+    Args:
+        optimizer: LaborOptimizer instance
+        target_date: Target date for availability analysis
+    Returns:
+        dict: Dictionary containing various statistics
+    """
+    try:
+        # Employee type costs
+        employee_types_df = optimizer.employee_types_df
+        costs_data = []
+        for _, row in employee_types_df.iterrows():
+            costs_data.append({
+                'Employee Type': row['type_name'].title(),
+                'Usual Cost ($/hr)': f"${row['usual_cost']:.2f}",
+                'Overtime Cost ($/hr)': f"${row['overtime_cost']:.2f}",
+                'Evening Shift Cost ($/hr)': f"${row['evening_shift_cost']:.2f}",
+                'Max Hours': row['max_hours'],
+                'Unit Manpower/Hr': row['unit_productivity_per_hour']
+            })
+        # Shift hours information
+        shift_hours = optimizer._get_shift_hours()
+        shift_data = []
+        for shift_type, hours in shift_hours.items():
+            shift_data.append({
+                'Shift Type': shift_type.replace('_', ' ').title(),
+                'Duration (hours)': f"{hours:.1f}"
+            })
+        # Employee availability for target date
+        availability_data = []
+        if target_date:
+            target_date_str = pd.to_datetime(target_date).strftime("%Y-%m-%d")
+        else:
+            # Use most recent date if no target date specified, but show warning
+            target_date_str = pd.to_datetime(optimizer.orders_df["due_date"]).max().strftime("%Y-%m-%d")
+            st.warning("⚠️ No target date specified. Using the most recent order date for analysis. Please select a specific target date for accurate availability data.")
+        availability_target_date = optimizer.employee_availability_df[
+            optimizer.employee_availability_df["date"] == target_date_str
+        ]
+        employee_availability = optimizer.employees_df.merge(
+            availability_target_date, left_on="id", right_on="employee_id", how="left"
+        )
+        for emp_type in optimizer.employee_types_df["type_name"]:
+            emp_type_data = employee_availability[
+                employee_availability["type_name"] == emp_type
+            ]
+            if not emp_type_data.empty:
+                first_shift_available = emp_type_data["first_shift_available"].sum()
+                second_shift_available = emp_type_data["second_shift_available"].sum()
+                overtime_available = emp_type_data["overtime_available"].sum()
+                total_employees = len(emp_type_data)
+            else:
+                first_shift_available = second_shift_available = overtime_available = total_employees = 0
+            availability_data.append({
+                'Employee Type': emp_type.title(),
+                'Total Employees': total_employees,
+                'Usual Time Available': first_shift_available,
+                'Evening Shift Available': second_shift_available,
+                'Overtime Available': overtime_available
+            })
+        # Overall statistics
+        total_employees = len(optimizer.employees_df)
+        total_employee_types = len(optimizer.employee_types_df)
+        total_orders = len(optimizer.orders_df)
+        return {
+            'costs_data': costs_data,
+            'shift_data': shift_data,
+            'availability_data': availability_data,
+            'overall_stats': {
+                'Total Employees': total_employees,
+                'Employee Types': total_employee_types,
+                'Total Orders': total_orders,
+                'Analysis Date': target_date_str,
+                'is_default_date': not bool(target_date)
+            }
+        }
+    except Exception as e:
+        st.error(f"Error generating metadata: {str(e)}")
+        return None
+def display_metadata_section(metadata):
+    """Display metadata in organized sections"""
+    if not metadata:
+        return
+    # Make the entire Dataset Overview section collapsible
+    with st.expander("📊 Dataset Overview", expanded=False):
+        # Overall statistics
+        st.write("Information on the date chosen - not an optimization report") # df, err, func, keras!
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Employees Available", metadata['overall_stats']['Total Employees'])
+        with col2:
+            st.metric("Employee Types Available", metadata['overall_stats']['Employee Types'])
+        with col3:
+            st.metric("Total Orders", metadata['overall_stats']['Total Orders'])
+        with col4:
+            analysis_date = metadata['overall_stats']['Analysis Date']
+            if metadata['overall_stats'].get('is_default_date', False):
+                st.metric("Analysis Date", f"{analysis_date} ⚠️", help="Using most recent order date - select specific date for accurate analysis")
+            else:
+                st.metric("Analysis Date", analysis_date)
+        # Create tabs for different metadata sections
+        tab1, tab2, tab3 = st.tabs(["💰 Employee Costs", "🕐 Shift Information", "👥 Availability"])
+        with tab1:
+            st.subheader("Employee Type Costs")
+            costs_df = pd.DataFrame(metadata['costs_data'])
+            st.dataframe(costs_df, use_container_width=True)
+            # Cost comparison chart
+            costs_for_chart = []
+            for item in metadata['costs_data']:
+                emp_type = item['Employee Type']
+                costs_for_chart.extend([
+                    {'Employee Type': emp_type, 'Cost Type': 'Usual', 'Cost': float(item['Usual Cost ($/hr)'].replace('$', ''))},
+                    {'Employee Type': emp_type, 'Cost Type': 'Overtime', 'Cost': float(item['Overtime Cost ($/hr)'].replace('$', ''))},
+                    {'Employee Type': emp_type, 'Cost Type': 'Evening', 'Cost': float(item['Evening Shift Cost ($/hr)'].replace('$', ''))}
+                ])
+            chart_df = pd.DataFrame(costs_for_chart)
+            fig = px.bar(chart_df, x='Employee Type', y='Cost', color='Cost Type',
+                        title='Hourly Costs by Employee Type and Shift',
+                        barmode='group')
+            st.plotly_chart(fig, use_container_width=True)
+        with tab2:
+            st.subheader("Shift Duration Information")
+            shift_df = pd.DataFrame(metadata['shift_data'])
+            st.dataframe(shift_df, use_container_width=True)
+            # Shift duration chart
+            fig2 = px.bar(shift_df, x='Shift Type', y='Duration (hours)',
+                         title='Shift Duration by Type')
+            st.plotly_chart(fig2, use_container_width=True)
+        with tab3:
+            st.subheader("Employee Availability")
+            availability_df = pd.DataFrame(metadata['availability_data'])
+            st.dataframe(availability_df, use_container_width=True)
+            # # Availability chart
+            # availability_chart_data = []
+            # for item in metadata['availability_data']:
+            #     emp_type = item['Employee Type']
+            #     availability_chart_data.extend([
+            #         {'Employee Type': emp_type, 'Shift Type': 'Usual Time', 'Available': item['Usual Time Available']},
+            #         {'Employee Type': emp_type, 'Shift Type': 'Evening Shift', 'Available': item['Evening Shift Available']},
+            #         {'Employee Type': emp_type, 'Shift Type': 'Overtime', 'Available': item['Overtime Available']}
+            #     ])
+            # chart_df2 = pd.DataFrame(availability_chart_data)
+            # fig3 = px.bar(chart_df2, x='Employee Type', y='Available', color='Shift Type',
+            #              title='Available Workers by Employee Type and Shift',
+            #              barmode='group')
+            # st.plotly_chart(fig3, use_container_width=True)
+def main():
+    st.set_page_config(page_title="Labor Optimization Tool", layout="wide")
+    st.title("Labor Optimization Visualization Tool")
+    # Initialize session state
+    if 'data_path' not in st.session_state:
+        st.session_state.data_path = "data/my_roster_data"
+    # Sidebar for inputs
+    with st.sidebar:
+        st.header("Optimization Parameters")
+        data_path = st.text_input("Data Path", value=st.session_state.data_path)
+        # Update session state when user changes data_path
+        st.session_state.data_path = data_path
+        # Load available dates from the dataset
+        available_dates = get_available_dates(data_path)
+        if available_dates:
+            date_options = [""] + [str(date) for date in available_dates]
+            target_date = st.selectbox(
+                "Target Date (select empty for latest date)",
+                options=date_options,
+                index=0,
+            )
+            st.session_state.target_date = target_date
+        else:
+            target_date = st.text_input(
+                "Target Date (YYYY-MM-DD, leave empty for latest)"
+            )
+            if available_dates == []:
+                st.warning("No order dates found in dataset. Check the data path.")
+        st.header("Advanced Options")
+        st.caption("Set to 0 to use all available workers")
+        max_workers_permanent = st.number_input(
+            "Max Permanent Workers", min_value=0, value=0
+        )
+        max_workers_contract = st.number_input(
+            "Max Contract Workers", min_value=0, value=0
+        )
+        max_workers_temporary = st.number_input(
+            "Max Temporary Workers", min_value=0, value=0
+        )
+        # Add button to show metadata
+        show_metadata = st.checkbox("Show Dataset Overview", value=True)
+        optimize_btn = st.button("Run Optimization")
+    # Main area for optimization results
+    if optimize_btn:
+        try:
+            with st.spinner("Running optimization..."):
+                optimizer = LaborOptimizer(data_path)
+                # Prepare override dict if values are provided
+                max_workers_override = {}
+                if max_workers_permanent > 0:
+                    max_workers_override["permanent"] = max_workers_permanent
+                if max_workers_contract > 0:
+                    max_workers_override["contract"] = max_workers_contract
+                if max_workers_temporary > 0:
+                    max_workers_override["temporary"] = max_workers_temporary
+                # If no overrides provided, pass None instead of empty dict
+                if not max_workers_override:
+                    max_workers_override = None
+                results = optimizer.optimize(target_date, max_workers_override)
+                if isinstance(results, str):
+                    st.error(results)
+                else:
+                    # Wrap optimization results in an expander
+                    with st.expander("🎯 Optimization Results", expanded=True):
+                        # Split the page into sections
+                        summary_col, allocation_col = st.columns([1, 1])
+                        with summary_col:
+                            st.subheader("Optimization Summary")
+                            st.write(f"**Target Date:** {results['target_date']}")
+                            st.write(
+                                f"**Total Labor Hours:** {results['total_labor_hours_needed']:.2f}"
+                            )
+                            st.write(f"**Total Cost:** ${results['total_cost']:.2f}")
+                        with allocation_col:
+                            st.subheader("Employee Allocation")
+                            allocation_data = results["allocation"]
+                            # Create a DataFrame for easier visualization
+                            allocation_df = pd.DataFrame.from_dict(
+                                {
+                                    emp_type: {
+                                        shift: int(val) for shift, val in shifts.items()
+                                    }
+                                    for emp_type, shifts in allocation_data.items()
+                                },
+                                orient="index",
+                            )
+                            allocation_df.index.name = "Employee Type"
+                            allocation_df.columns = [
+                                col.replace("_", " ").title()
+                                for col in allocation_df.columns
+                            ]
+                            st.dataframe(allocation_df)
+                        # Cost visualization
+                        st.subheader("Cost Visualization")
+                        # Prepare data for visualization
+                        cost_data = []
+                        for emp_type, shifts in allocation_data.items():
+                            shift_hours = results["shift_hours"]
+                            costs = optimizer.employee_types_df.set_index("type_name")
+                            shift_cost_mapping = {
+                                "usual_time": "usual_cost",
+                                "overtime": "overtime_cost",
+                                "evening_shift": "evening_shift_cost",
+                            }
+                            for shift in shifts:
+                                cost = (
+                                    shifts[shift]
+                                    * shift_hours[shift]
+                                    * costs.loc[emp_type, shift_cost_mapping[shift]]
+                                )
+                                if cost > 0:  # Only add non-zero costs
+                                    cost_data.append(
+                                        {
+                                            "Employee Type": emp_type.title(),
+                                            "Shift": shift.replace("_", " ").title(),
+                                            "Cost": cost,
+                                            "Workers": int(shifts[shift]),
+                                        }
+                                    )
+                        cost_df = pd.DataFrame(cost_data)
+                        col1, col2 = st.columns([3, 2])
+                        with col1:
+                            # Bar chart for costs
+                            if not cost_df.empty:
+                                fig = px.bar(
+                                    cost_df,
+                                    x="Shift",
+                                    y="Cost",
+                                    color="Employee Type",
+                                    title="Labor Cost by Employee Type and Shift",
+                                    labels={"Cost": "Cost ($)"},
+                                )
+                                st.plotly_chart(fig, use_container_width=True)
+                        with col2:
+                            # Pie chart for total cost by employee type
+                            if not cost_df.empty:
+                                total_by_type = (
+                                    cost_df.groupby("Employee Type")["Cost"]
+                                    .sum()
+                                    .reset_index()
+                                )
+                                fig2 = px.pie(
+                                    total_by_type,
+                                    values="Cost",
+                                    names="Employee Type",
+                                    title="Total Cost by Employee Type",
+                                )
+                                st.plotly_chart(fig2, use_container_width=True)
+                        # Worker allocation visualization
+                        st.subheader("Worker Allocation")
+                        worker_data = []
+                        for emp_type, shifts in allocation_data.items():
+                            for shift, count in shifts.items():
+                                if count > 0:  # Only add non-zero allocations
+                                    worker_data.append(
+                                        {
+                                            "Employee Type": emp_type.title(),
+                                            "Shift": shift.replace("_", " ").title(),
+                                            "Workers": int(count),
+                                        }
+                                    )
+                        worker_df = pd.DataFrame(worker_data)
+                        if not worker_df.empty:
+                            fig3 = px.bar(
+                                worker_df,
+                                x="Shift",
+                                y="Workers",
+                                color="Employee Type",
+                                title="Worker Allocation by Shift and Employee Type",
+                                barmode="group",
+                            )
+                            st.plotly_chart(fig3, use_container_width=True)
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+            st.exception(e)
+    # Display metadata section if requested - moved below optimization results
+    if show_metadata:
+        try:
+            optimizer = LaborOptimizer(data_path)
+            # Show warning if no target date is selected
+            if not target_date:
+                st.info("💡 **Tip**: Select a specific target date from the sidebar to see accurate availability data for that date. Currently showing data for the most recent order date.")
+        except Exception as e:
+            st.error(f"Error loading metadata: {str(e)}")
+if __name__ == "__main__":
+    main()

src/visualization/pages/2_metadata.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import sys
+import os
+import pandas as pd
+import streamlit as st
+import plotly.express as px
+from datetime import datetime
+# Add parent directory to path to import LaborOptimizer
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from optimization.labor_optimizer import LaborOptimizer
+def get_available_dates(data_path):
+    """Load the orders data and extract unique dates"""
+    try:
+        orders_file = os.path.join(data_path, "orders.csv")
+        if os.path.exists(orders_file):
+            orders_df = pd.read_csv(orders_file)
+            if "due_date" in orders_df.columns:
+                # Convert to datetime and extract unique dates
+                dates = pd.to_datetime(orders_df["due_date"]).dt.date.unique()
+                # Sort dates in descending order (most recent first)
+                dates = sorted(dates, reverse=True)
+                return dates
+    except Exception as e:
+        st.error(f"Error loading dates: {str(e)}")
+    return []
+def get_metadata_stats(optimizer, target_date=None):
+    """
+    Aggregate metadata statistics about employee costs and availability
+    Args:
+        optimizer: LaborOptimizer instance
+        target_date: Target date for availability analysis
+    Returns:
+        dict: Dictionary containing various statistics
+    """
+    try:
+        # Employee type costs
+        employee_types_df = optimizer.employee_types_df
+        costs_data = []
+        for _, row in employee_types_df.iterrows():
+            costs_data.append({
+                'Employee Type': row['type_name'].title(),
+                'Usual Cost ($/hr)': f"${row['usual_cost']:.2f}",
+                'Overtime Cost ($/hr)': f"${row['overtime_cost']:.2f}",
+                'Evening Shift Cost ($/hr)': f"${row['evening_shift_cost']:.2f}",
+                'Max Hours': row['max_hours'],
+                'Unit Manpower/Hr': row['unit_productivity_per_hour']
+            })
+        # Shift hours information
+        shift_hours = optimizer._get_shift_hours()
+        shift_data = []
+        for shift_type, hours in shift_hours.items():
+            shift_data.append({
+                'Shift Type': shift_type.replace('_', ' ').title(),
+                'Duration (hours)': f"{hours:.1f}"
+            })
+        # Employee availability for target date
+        availability_data = []
+        if target_date:
+            target_date_str = pd.to_datetime(target_date).strftime("%Y-%m-%d")
+        else:
+            # Use most recent date if no target date specified, but show warning
+            target_date_str = pd.to_datetime(optimizer.orders_df["due_date"]).max().strftime("%Y-%m-%d")
+            st.warning("⚠️ No target date specified. Using the most recent order date for analysis. Please select a specific target date for accurate availability data.")
+        availability_target_date = optimizer.employee_availability_df[
+            optimizer.employee_availability_df["date"] == target_date_str
+        ]
+        employee_availability = optimizer.employees_df.merge(
+            availability_target_date, left_on="id", right_on="employee_id", how="left"
+        )
+        for emp_type in optimizer.employee_types_df["type_name"]:
+            emp_type_data = employee_availability[
+                employee_availability["type_name"] == emp_type
+            ]
+            if not emp_type_data.empty:
+                first_shift_available = emp_type_data["first_shift_available"].sum()
+                second_shift_available = emp_type_data["second_shift_available"].sum()
+                overtime_available = emp_type_data["overtime_available"].sum()
+                total_employees = len(emp_type_data)
+            else:
+                first_shift_available = second_shift_available = overtime_available = total_employees = 0
+            availability_data.append({
+                'Employee Type': emp_type.title(),
+                'Total Employees': total_employees,
+                'Usual Time Available': first_shift_available,
+                'Evening Shift Available': second_shift_available,
+                'Overtime Available': overtime_available
+            })
+        # Overall statistics
+        total_employees = len(optimizer.employees_df)
+        total_employee_types = len(optimizer.employee_types_df)
+        total_orders = len(optimizer.orders_df)
+        return {
+            'costs_data': costs_data,
+            'shift_data': shift_data,
+            'availability_data': availability_data,
+            'overall_stats': {
+                'Total Employees': total_employees,
+                'Employee Types': total_employee_types,
+                'Total Orders': total_orders,
+                'Analysis Date': target_date_str,
+                'is_default_date': not bool(target_date)
+            }
+        }
+    except Exception as e:
+        st.error(f"Error generating metadata: {str(e)}")
+        return None
+def display_metadata_section(metadata):
+    """Display metadata in organized sections"""
+    if not metadata:
+        return
+    # Make the entire Dataset Overview section collapsible
+    # with st.expander("📊 Dataset Overview", expanded=False):
+    with st.expander("📊 Dataset Overview", expanded=False):
+        st.write(f"Data path: {st.session_state.data_path}")
+        # Overall statistics
+        st.write("Information on the date chosen - not an optimization report") # df, err, func, keras!
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Employees Available", metadata['overall_stats']['Total Employees'])
+        with col2:
+            st.metric("Employee Types Available", metadata['overall_stats']['Employee Types'])
+        with col3:
+            st.metric("Total Orders", metadata['overall_stats']['Total Orders'])
+        with col4:
+            analysis_date = metadata['overall_stats']['Analysis Date']
+            if metadata['overall_stats'].get('is_default_date', False):
+                st.metric("Analysis Date", f"{analysis_date} ⚠️", help="Using most recent order date - select specific date for accurate analysis")
+            else:
+                st.metric("Analysis Date", analysis_date)
+        # Create tabs for different metadata sections
+        tab1, tab2, tab3 = st.tabs(["💰 Employee Costs", "🕐 Shift Information", "👥 Availability"])
+        with tab1:
+            st.subheader("Employee Type Costs")
+            costs_df = pd.DataFrame(metadata['costs_data'])
+            st.dataframe(costs_df, use_container_width=True)
+            # Cost comparison chart
+            costs_for_chart = []
+            for item in metadata['costs_data']:
+                emp_type = item['Employee Type']
+                costs_for_chart.extend([
+                    {'Employee Type': emp_type, 'Cost Type': 'Usual', 'Cost': float(item['Usual Cost ($/hr)'].replace('$', ''))},
+                    {'Employee Type': emp_type, 'Cost Type': 'Overtime', 'Cost': float(item['Overtime Cost ($/hr)'].replace('$', ''))},
+                    {'Employee Type': emp_type, 'Cost Type': 'Evening', 'Cost': float(item['Evening Shift Cost ($/hr)'].replace('$', ''))}
+                ])
+            chart_df = pd.DataFrame(costs_for_chart)
+            fig = px.bar(chart_df, x='Employee Type', y='Cost', color='Cost Type',
+                        title='Hourly Costs by Employee Type and Shift',
+                        barmode='group')
+            st.plotly_chart(fig, use_container_width=True)
+        with tab2:
+            st.subheader("Shift Duration Information")
+            shift_df = pd.DataFrame(metadata['shift_data'])
+            st.dataframe(shift_df, use_container_width=True)
+            # Shift duration chart
+            fig2 = px.bar(shift_df, x='Shift Type', y='Duration (hours)',
+                            title='Shift Duration by Type')
+            st.plotly_chart(fig2, use_container_width=True)
+        with tab3:
+            st.subheader("Employee Availability")
+            availability_df = pd.DataFrame(metadata['availability_data'])
+            st.dataframe(availability_df, use_container_width=True)
+            # # Availability chart
+            # availability_chart_data = []
+            # for item in metadata['availability_data']:
+            #     emp_type = item['Employee Type']
+            #     availability_chart_data.extend([
+            #         {'Employee Type': emp_type, 'Shift Type': 'Usual Time', 'Available': item['Usual Time Available']},
+            #         {'Employee Type': emp_type, 'Shift Type': 'Evening Shift', 'Available': item['Evening Shift Available']},
+            #         {'Employee Type': emp_type, 'Shift Type': 'Overtime', 'Available': item['Overtime Available']}
+            #     ])
+            # chart_df2 = pd.DataFrame(availability_chart_data)
+            # fig3 = px.bar(chart_df2, x='Employee Type', y='Available', color='Shift Type',
+            #              title='Available Workers by Employee Type and Shift',
+            #              barmode='group')
+            # st.plotly_chart(fig3, use_container_width=True)
+def display_demand(optimizer):
+    with st.expander("📊 Demand", expanded=False):
+        demand_df = optimizer.orders_df
+        st.header("Demand")
+        daily_demand = demand_df.groupby('date_of_order').sum()['order_amount'].reset_index()
+        st.plotly_chart(px.bar(daily_demand, x='date_of_order', y='order_amount', title='Demand by Date'), use_container_width=True)
+        st.markdown("### Demand for the selected date")
+        st.dataframe(demand_df[demand_df['date_of_order']==st.session_state.target_date], use_container_width=True)
+def display_employee_availability(optimizer):
+    with st.expander("👥 Employee Availability", expanded=False):
+        st.header("Employee Availability")
+        employee_availability_df = optimizer.employee_availability_df
+        employee_availability_df['date'] = pd.to_datetime(employee_availability_df['date'])
+        employee_availability_target_date = employee_availability_df[employee_availability_df['date']==st.session_state.target_date]
+        employee_availability_target_date = pd.merge(employee_availability_target_date, optimizer.employees_df, left_on='employee_id', right_on='id', how='left')
+        st.dataframe(employee_availability_target_date[['name', 'employee_id', 'type_name', 'first_shift_available', 'second_shift_available', 'overtime_available']], use_container_width=True)
+        # Group by type_name and sum the availability columns
+        available_employee_grouped = employee_availability_target_date.groupby('type_name')[
+            ['first_shift_available', 'second_shift_available', 'overtime_available']
+        ].sum().reset_index()
+        st.markdown("### Employee Availability for the selected date")
+        # Create non-stacked (grouped) bar chart using plotly
+        fig = px.bar(
+            available_employee_grouped.melt(id_vars=['type_name'], var_name='shift_type', value_name='count'),
+            x='type_name',
+            y='count',
+            color='shift_type',
+            barmode='group',  # This makes it non-stacked
+            title='Available Employee Count by Type and Shift',
+            labels={'type_name': 'Employee Type', 'count': 'Available Count', 'shift_type': 'Shift Type'}
+        )
+        st.plotly_chart(fig, use_container_width=True)
+    # st.dataframe(employee_availability_target_date, use_container_width=True)
+    # st.plotly_chart(px.bar(employee_availability_target_date, x='employee_id', y='availability', title='Employee Availability by Date'), use_container_width=True)
+    # st.dataframe(employee_availability_df[employee_availability_df['date']==st.session_state.target_date], use_container_width=True)
+def main():
+    """Main function for metadata page"""
+    st.set_page_config(page_title="Dataset Metadata", layout="wide")
+    st.title("📊 Dataset Metadata Overview")
+    # Get data_path from session state if available, otherwise create input
+    if 'data_path' in st.session_state:
+        # Using shared data_path from optimize_viz.py
+        data_path = st.session_state.data_path
+        st.sidebar.info(f"📁 Using shared data path: `{data_path}`")
+    else:
+        st.error("No data path found. Please select a data path in the sidebar.")
+    if 'target_date' in st.session_state:
+            target_date = st.session_state.target_date
+            st.sidebar.info(f"📅 Using shared target date: `{target_date}`")
+    else:
+        st.error("No target date found. Please select a target date in the sidebar.")
+    #If the date selection needs to be individualized per page, uncomment the following code
+    # with st.sidebar:
+    #     # Date selection
+    #     available_dates = get_available_dates(data_path)
+    #     if available_dates:
+    #         date_options = [""] + [str(date) for date in available_dates]
+    #         target_date = st.selectbox(
+    #             "Target Date (select empty for latest date)",
+    #             options=date_options,
+    #             index=0,
+    #         )
+    #     else:
+    #         target_date = st.text_input(
+    #             "Target Date (YYYY-MM-DD, leave empty for latest)"
+    #         )
+    try:
+        optimizer = LaborOptimizer(data_path)
+        # Show warning if no target date is selected
+        if not target_date:
+            st.info("💡 **Tip**: Select a specific target date from the sidebar to see accurate availability data for that date. Currently showing data for the most recent order date.")
+        metadata = get_metadata_stats(optimizer, target_date if target_date else None)
+        display_metadata_section(metadata)
+        display_demand(optimizer)
+        display_employee_availability(optimizer)
+    except Exception as e:
+        st.error(f"Error loading metadata: {str(e)}")
+if __name__ == "__main__":
+    main()