XLS Microsoft Excel CDF document

AI-powered detection and analysis of Microsoft Excel CDF document files.

📂 Document

🏷️ .xls

🎯 application/vnd.ms-excel

🔍

Instant XLS File Detection

Use our advanced AI-powered tool to instantly detect and analyze Microsoft Excel CDF document files with precision and speed.

File Information

File Description

Microsoft Excel CDF document

XLS (Excel 97-2003 Binary Format)

Overview

Microsoft Excel Binary File Format (XLS) is the legacy spreadsheet format used by Microsoft Excel 97 through Excel 2003. It stores spreadsheet data in a binary format using the Compound Document File (CDF) structure, supporting multiple worksheets, formulas, formatting, and macros.

Technical Specifications

Format Type: Binary spreadsheet format
File Extension: .xls
MIME Type: application/vnd.ms-excel
Container: Compound Document Format (CDF/OLE2)
Maximum Rows: 65,536 (2^16)
Maximum Columns: 256 (IV)
Maximum Cells: 16,777,216
File Size Limit: ~2GB

Format Structure

XLS files use the Compound Document Format with:

Directory structure for organizing streams
Workbook stream containing spreadsheet data
Record-based binary structure (BIFF format)
Formula records with binary encoding
Formatting and style information
Optional VBA macro storage

History and Development

1987: Excel 2.0 introduced .xls format
1990: Excel 3.0 expanded capabilities
1992: Excel 4.0 added macro support
1995: Excel 95 (version 7.0) significant updates
1997: Excel 97 introduced BIFF8 format
2007: Replaced by XLSX format but maintained compatibility

Use Cases

Legacy Excel spreadsheet files
Financial models and calculations
Data analysis and reporting
Business process automation with macros
Compatibility with older Excel versions
Enterprise systems integration

Code Examples

Python XLS Processing with xlrd/xlwt

import xlrd
import xlwt
from datetime import datetime, date
import os

class XLSProcessor:
    def __init__(self):
        self.workbook = None
    
    def read_xls_file(self, filepath):
        """Read and analyze XLS file."""
        try:
            self.workbook = xlrd.open_workbook(filepath, formatting_info=True)
            
            info = {
                'filename': os.path.basename(filepath),
                'sheets': [],
                'creation_date': None,
                'last_modified': None,
                'has_macros': self.workbook.book_type == xlrd.XL_WORKBOOK_BIFF8,
                'codepage': self.workbook.codepage
            }
            
            # Process each worksheet
            for sheet_idx in range(self.workbook.nsheets):
                sheet = self.workbook.sheet_by_index(sheet_idx)
                sheet_info = self.analyze_sheet(sheet)
                info['sheets'].append(sheet_info)
            
            return info
            
        except Exception as e:
            print(f"Error reading XLS file: {e}")
            return None
    
    def analyze_sheet(self, sheet):
        """Analyze individual worksheet."""
        sheet_info = {
            'name': sheet.name,
            'rows': sheet.nrows,
            'cols': sheet.ncols,
            'cells_with_data': 0,
            'formulas': 0,
            'data_types': {
                'text': 0,
                'number': 0,
                'date': 0,
                'boolean': 0,
                'error': 0,
                'empty': 0
            },
            'sample_data': []
        }
        
        # Analyze cell data
        for row in range(min(sheet.nrows, 1000)):  # Limit analysis for performance
            for col in range(sheet.ncols):
                cell = sheet.cell(row, col)
                
                if cell.ctype != xlrd.XL_CELL_EMPTY:
                    sheet_info['cells_with_data'] += 1
                    
                    # Count data types
                    if cell.ctype == xlrd.XL_CELL_TEXT:
                        sheet_info['data_types']['text'] += 1
                    elif cell.ctype == xlrd.XL_CELL_NUMBER:
                        if xlrd.xldate.xldate_isdate(cell.value, self.workbook.datemode):
                            sheet_info['data_types']['date'] += 1
                        else:
                            sheet_info['data_types']['number'] += 1
                    elif cell.ctype == xlrd.XL_CELL_BOOLEAN:
                        sheet_info['data_types']['boolean'] += 1
                    elif cell.ctype == xlrd.XL_CELL_ERROR:
                        sheet_info['data_types']['error'] += 1
                    
                    # Check for formulas
                    formula = sheet.cell_formula(row, col)
                    if formula:
                        sheet_info['formulas'] += 1
                    
                    # Collect sample data (first 10 rows)
                    if row < 10 and len(sheet_info['sample_data']) < 10:
                        value = self.format_cell_value(cell, self.workbook.datemode)
                        if row == len(sheet_info['sample_data']):
                            sheet_info['sample_data'].append([])
                        sheet_info['sample_data'][row].append(value)
                else:
                    sheet_info['data_types']['empty'] += 1
        
        return sheet_info
    
    def format_cell_value(self, cell, datemode):
        """Format cell value for display."""
        if cell.ctype == xlrd.XL_CELL_EMPTY:
            return ""
        elif cell.ctype == xlrd.XL_CELL_TEXT:
            return cell.value
        elif cell.ctype == xlrd.XL_CELL_NUMBER:
            if xlrd.xldate.xldate_isdate(cell.value, datemode):
                date_tuple = xlrd.xldate.xldate_as_tuple(cell.value, datemode)
                return datetime(*date_tuple).strftime("%Y-%m-%d %H:%M:%S")
            else:
                return cell.value
        elif cell.ctype == xlrd.XL_CELL_BOOLEAN:
            return bool(cell.value)
        elif cell.ctype == xlrd.XL_CELL_ERROR:
            return f"ERROR: {cell.value}"
        else:
            return str(cell.value)
    
    def convert_to_csv(self, xls_path, output_dir=None):
        """Convert XLS sheets to CSV files."""
        if not self.workbook:
            self.workbook = xlrd.open_workbook(xls_path)
        
        if not output_dir:
            output_dir = os.path.dirname(xls_path)
        
        base_name = os.path.splitext(os.path.basename(xls_path))[0]
        csv_files = []
        
        for sheet_idx in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(sheet_idx)
            
            # Create CSV filename
            sheet_name = sheet.name.replace('/', '_').replace('\\', '_')
            csv_filename = f"{base_name}_{sheet_name}.csv"
            csv_path = os.path.join(output_dir, csv_filename)
            
            # Write CSV
            import csv
            with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
                writer = csv.writer(csvfile)
                
                for row in range(sheet.nrows):
                    row_data = []
                    for col in range(sheet.ncols):
                        cell = sheet.cell(row, col)
                        value = self.format_cell_value(cell, self.workbook.datemode)
                        row_data.append(value)
                    writer.writerow(row_data)
            
            csv_files.append(csv_path)
            print(f"Converted sheet '{sheet.name}' to {csv_path}")
        
        return csv_files
    
    def extract_formulas(self, xls_path):
        """Extract all formulas from XLS file."""
        if not self.workbook:
            self.workbook = xlrd.open_workbook(xls_path, formatting_info=True)
        
        formulas = []
        
        for sheet_idx in range(self.workbook.nsheets):
            sheet = self.workbook.sheet_by_index(sheet_idx)
            
            for row in range(sheet.nrows):
                for col in range(sheet.ncols):
                    formula = sheet.cell_formula(row, col)
                    if formula:
                        cell_ref = xlrd.cellname(row, col)
                        formulas.append({
                            'sheet': sheet.name,
                            'cell': cell_ref,
                            'formula': formula,
                            'value': sheet.cell_value(row, col)
                        })
        
        return formulas

# Create XLS files with xlwt
class XLSCreator:
    def __init__(self):
        self.workbook = xlwt.Workbook()
        self.styles = self._create_styles()
    
    def _create_styles(self):
        """Create common cell styles."""
        styles = {}
        
        # Header style
        header_style = xlwt.XFStyle()
        header_font = xlwt.Font()
        header_font.bold = True
        header_font.colour_index = xlwt.Style.colour_map['white']
        header_style.font = header_font
        
        header_pattern = xlwt.Pattern()
        header_pattern.pattern = xlwt.Pattern.SOLID_PATTERN
        header_pattern.pattern_fore_colour = xlwt.Style.colour_map['dark_blue']
        header_style.pattern = header_pattern
        
        styles['header'] = header_style
        
        # Date style
        date_style = xlwt.XFStyle()
        date_style.num_format_str = 'YYYY-MM-DD'
        styles['date'] = date_style
        
        # Currency style
        currency_style = xlwt.XFStyle()
        currency_style.num_format_str = '$#,##0.00'
        styles['currency'] = currency_style
        
        # Percentage style
        percent_style = xlwt.XFStyle()
        percent_style.num_format_str = '0.00%'
        styles['percent'] = percent_style
        
        return styles
    
    def create_sample_spreadsheet(self, filename):
        """Create a sample XLS file with various data types."""
        sheet = self.workbook.add_sheet('Sample Data')
        
        # Headers
        headers = ['ID', 'Name', 'Date', 'Amount', 'Percentage', 'Active']
        for col, header in enumerate(headers):
            sheet.write(0, col, header, self.styles['header'])
        
        # Sample data
        sample_data = [
            [1, 'John Doe', date(2023, 1, 15), 1250.50, 0.125, True],
            [2, 'Jane Smith', date(2023, 2, 20), 2100.75, 0.089, True],
            [3, 'Bob Johnson', date(2023, 3, 10), 875.25, 0.156, False],
            [4, 'Alice Brown', date(2023, 4, 5), 3200.00, 0.234, True],
            [5, 'Charlie Wilson', date(2023, 5, 12), 1890.30, 0.098, True]
        ]
        
        for row, data in enumerate(sample_data, 1):
            sheet.write(row, 0, data[0])  # ID
            sheet.write(row, 1, data[1])  # Name
            sheet.write(row, 2, data[2], self.styles['date'])  # Date
            sheet.write(row, 3, data[3], self.styles['currency'])  # Amount
            sheet.write(row, 4, data[4], self.styles['percent'])  # Percentage
            sheet.write(row, 5, data[5])  # Active
        
        # Add formulas
        formula_sheet = self.workbook.add_sheet('Formulas')
        formula_sheet.write(0, 0, 'Sum:', self.styles['header'])
        formula_sheet.write(0, 1, xlwt.Formula('SUM(A2:A6)'))
        
        formula_sheet.write(1, 0, 'Average:', self.styles['header'])
        formula_sheet.write(1, 1, xlwt.Formula('AVERAGE(A2:A6)'))
        
        formula_sheet.write(2, 0, 'Count:', self.styles['header'])
        formula_sheet.write(2, 1, xlwt.Formula('COUNT(A2:A6)'))
        
        # Save workbook
        self.workbook.save(filename)
        print(f"Sample XLS file created: {filename}")
    
    def create_from_data(self, data, filename, sheet_name='Data'):
        """Create XLS file from Python data."""
        sheet = self.workbook.add_sheet(sheet_name)
        
        if not data:
            return
        
        # Write headers if data is list of dictionaries
        if isinstance(data[0], dict):
            headers = list(data[0].keys())
            for col, header in enumerate(headers):
                sheet.write(0, col, header, self.styles['header'])
            
            # Write data
            for row, record in enumerate(data, 1):
                for col, header in enumerate(headers):
                    value = record.get(header, '')
                    
                    # Apply appropriate style based on data type
                    if isinstance(value, datetime):
                        sheet.write(row, col, value, self.styles['date'])
                    elif isinstance(value, (int, float)) and col in [3, 4]:  # Assuming currency columns
                        sheet.write(row, col, value, self.styles['currency'])
                    else:
                        sheet.write(row, col, value)
        
        # Write data if it's a list of lists
        elif isinstance(data[0], (list, tuple)):
            for row, record in enumerate(data):
                for col, value in enumerate(record):
                    sheet.write(row, col, value)
        
        self.workbook.save(filename)
        print(f"XLS file created from data: {filename}")

# Usage examples
def analyze_xls_file(filepath):
    """Analyze and display XLS file information."""
    processor = XLSProcessor()
    info = processor.read_xls_file(filepath)
    
    if not info:
        return
    
    print(f"File: {info['filename']}")
    print(f"Sheets: {len(info['sheets'])}")
    print(f"Has Macros: {info['has_macros']}")
    print(f"Codepage: {info['codepage']}")
    print("-" * 50)
    
    for sheet in info['sheets']:
        print(f"\nSheet: {sheet['name']}")
        print(f"  Dimensions: {sheet['rows']} rows × {sheet['cols']} columns")
        print(f"  Cells with data: {sheet['cells_with_data']}")
        print(f"  Formulas: {sheet['formulas']}")
        print(f"  Data types: {sheet['data_types']}")
        
        if sheet['sample_data']:
            print(f"  Sample data (first few rows):")
            for i, row in enumerate(sheet['sample_data'][:3]):
                print(f"    Row {i+1}: {row[:5]}...")  # Show first 5 columns

def batch_convert_xls_to_csv(directory):
    """Convert all XLS files in directory to CSV."""
    processor = XLSProcessor()
    
    for filename in os.listdir(directory):
        if filename.lower().endswith('.xls'):
            filepath = os.path.join(directory, filename)
            print(f"\nProcessing: {filename}")
            
            try:
                csv_files = processor.convert_to_csv(filepath)
                print(f"Created {len(csv_files)} CSV files")
            except Exception as e:
                print(f"Error converting {filename}: {e}")

# Example usage
if __name__ == "__main__":
    # Create sample XLS file
    creator = XLSCreator()
    creator.create_sample_spreadsheet('sample.xls')
    
    # Analyze the created file
    analyze_xls_file('sample.xls')
    
    # Convert to CSV
    processor = XLSProcessor()
    processor.convert_to_csv('sample.xls')
    
    # Extract formulas
    formulas = processor.extract_formulas('sample.xls')
    if formulas:
        print("\nFormulas found:")
        for formula in formulas:
            print(f"  {formula['sheet']}!{formula['cell']}: {formula['formula']}")

Java XLS Processing with Apache POI

import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellReference;

import java.io.*;
import java.util.*;

public class XLSProcessor {
    
    public static class XLSInfo {
        public String filename;
        public int numberOfSheets;
        public List<SheetInfo> sheets = new ArrayList<>();
        public boolean hasMacros;
        
        public static class SheetInfo {
            public String name;
            public int rows;
            public int columns;
            public int cellsWithData;
            public int formulas;
            public Map<String, Integer> dataTypes = new HashMap<>();
        }
    }
    
    public static XLSInfo analyzeXLSFile(String filepath) throws IOException {
        XLSInfo info = new XLSInfo();
        info.filename = new File(filepath).getName();
        
        try (FileInputStream fis = new FileInputStream(filepath);
             HSSFWorkbook workbook = new HSSFWorkbook(fis)) {
            
            info.numberOfSheets = workbook.getNumberOfSheets();
            info.hasMacros = workbook.containsMacros();
            
            for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                HSSFSheet sheet = workbook.getSheetAt(i);
                XLSInfo.SheetInfo sheetInfo = analyzeSheet(sheet);
                info.sheets.add(sheetInfo);
            }
        }
        
        return info;
    }
    
    private static XLSInfo.SheetInfo analyzeSheet(HSSFSheet sheet) {
        XLSInfo.SheetInfo sheetInfo = new XLSInfo.SheetInfo();
        sheetInfo.name = sheet.getSheetName();
        sheetInfo.rows = sheet.getLastRowNum() + 1;
        
        // Initialize data type counters
        sheetInfo.dataTypes.put("STRING", 0);
        sheetInfo.dataTypes.put("NUMERIC", 0);
        sheetInfo.dataTypes.put("BOOLEAN", 0);
        sheetInfo.dataTypes.put("FORMULA", 0);
        sheetInfo.dataTypes.put("ERROR", 0);
        sheetInfo.dataTypes.put("BLANK", 0);
        
        int maxColumns = 0;
        
        for (Row row : sheet) {
            maxColumns = Math.max(maxColumns, row.getLastCellNum());
            
            for (Cell cell : row) {
                if (cell.getCellType() != CellType.BLANK) {
                    sheetInfo.cellsWithData++;
                }
                
                // Count cell types
                switch (cell.getCellType()) {
                    case STRING:
                        sheetInfo.dataTypes.merge("STRING", 1, Integer::sum);
                        break;
                    case NUMERIC:
                        sheetInfo.dataTypes.merge("NUMERIC", 1, Integer::sum);
                        break;
                    case BOOLEAN:
                        sheetInfo.dataTypes.merge("BOOLEAN", 1, Integer::sum);
                        break;
                    case FORMULA:
                        sheetInfo.dataTypes.merge("FORMULA", 1, Integer::sum);
                        sheetInfo.formulas++;
                        break;
                    case ERROR:
                        sheetInfo.dataTypes.merge("ERROR", 1, Integer::sum);
                        break;
                    case BLANK:
                        sheetInfo.dataTypes.merge("BLANK", 1, Integer::sum);
                        break;
                }
            }
        }
        
        sheetInfo.columns = maxColumns;
        return sheetInfo;
    }
    
    public static void convertXLSToCSV(String xlsPath, String outputDir) throws IOException {
        try (FileInputStream fis = new FileInputStream(xlsPath);
             HSSFWorkbook workbook = new HSSFWorkbook(fis)) {
            
            String baseName = new File(xlsPath).getName().replaceFirst("[.][^.]+$", "");
            
            for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                HSSFSheet sheet = workbook.getSheetAt(i);
                String csvFileName = baseName + "_" + sheet.getSheetName() + ".csv";
                String csvPath = new File(outputDir, csvFileName).getPath();
                
                try (PrintWriter pw = new PrintWriter(new FileWriter(csvPath))) {
                    for (Row row : sheet) {
                        List<String> values = new ArrayList<>();
                        
                        for (int j = 0; j < row.getLastCellNum(); j++) {
                            Cell cell = row.getCell(j);
                            String value = getCellValueAsString(cell);
                            values.add(escapeCsvValue(value));
                        }
                        
                        pw.println(String.join(",", values));
                    }
                }
                
                System.out.println("Converted sheet '" + sheet.getSheetName() + 
                                 "' to " + csvPath);
            }
        }
    }
    
    private static String getCellValueAsString(Cell cell) {
        if (cell == null) {
            return "";
        }
        
        switch (cell.getCellType()) {
            case STRING:
                return cell.getStringCellValue();
            case NUMERIC:
                if (DateUtil.isCellDateFormatted(cell)) {
                    return cell.getDateCellValue().toString();
                } else {
                    return String.valueOf(cell.getNumericCellValue());
                }
            case BOOLEAN:
                return String.valueOf(cell.getBooleanCellValue());
            case FORMULA:
                try {
                    return String.valueOf(cell.getNumericCellValue());
                } catch (IllegalStateException e) {
                    return cell.getStringCellValue();
                }
            case ERROR:
                return "ERROR:" + cell.getErrorCellValue();
            case BLANK:
            default:
                return "";
        }
    }
    
    private static String escapeCsvValue(String value) {
        if (value.contains(",") || value.contains("\"") || value.contains("\n")) {
            return "\"" + value.replace("\"", "\"\"") + "\"";
        }
        return value;
    }
    
    public static void createSampleXLS(String filename) throws IOException {
        try (HSSFWorkbook workbook = new HSSFWorkbook()) {
            HSSFSheet sheet = workbook.createSheet("Sample Data");
            
            // Create header style
            HSSFCellStyle headerStyle = workbook.createCellStyle();
            HSSFFont headerFont = workbook.createFont();
            headerFont.setBold(true);
            headerStyle.setFont(headerFont);
            headerStyle.setFillForegroundColor(IndexedColors.LIGHT_BLUE.getIndex());
            headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
            
            // Create date style
            HSSFCellStyle dateStyle = workbook.createCellStyle();
            dateStyle.setDataFormat(workbook.createDataFormat().getFormat("yyyy-mm-dd"));
            
            // Create currency style
            HSSFCellStyle currencyStyle = workbook.createCellStyle();
            currencyStyle.setDataFormat(workbook.createDataFormat().getFormat("$#,##0.00"));
            
            // Create headers
            Row headerRow = sheet.createRow(0);
            String[] headers = {"ID", "Name", "Date", "Amount", "Active"};
            
            for (int i = 0; i < headers.length; i++) {
                Cell cell = headerRow.createCell(i);
                cell.setCellValue(headers[i]);
                cell.setCellStyle(headerStyle);
            }
            
            // Add sample data
            Object[][] data = {
                {1, "John Doe", new Date(), 1250.50, true},
                {2, "Jane Smith", new Date(), 2100.75, true},
                {3, "Bob Johnson", new Date(), 875.25, false},
                {4, "Alice Brown", new Date(), 3200.00, true}
            };
            
            for (int i = 0; i < data.length; i++) {
                Row row = sheet.createRow(i + 1);
                
                for (int j = 0; j < data[i].length; j++) {
                    Cell cell = row.createCell(j);
                    Object value = data[i][j];
                    
                    if (value instanceof String) {
                        cell.setCellValue((String) value);
                    } else if (value instanceof Integer) {
                        cell.setCellValue((Integer) value);
                    } else if (value instanceof Double) {
                        cell.setCellValue((Double) value);
                        if (j == 3) { // Amount column
                            cell.setCellStyle(currencyStyle);
                        }
                    } else if (value instanceof Date) {
                        cell.setCellValue((Date) value);
                        cell.setCellStyle(dateStyle);
                    } else if (value instanceof Boolean) {
                        cell.setCellValue((Boolean) value);
                    }
                }
            }
            
            // Add formula sheet
            HSSFSheet formulaSheet = workbook.createSheet("Formulas");
            Row row1 = formulaSheet.createRow(0);
            row1.createCell(0).setCellValue("Sum:");
            row1.createCell(1).setCellFormula("SUM('Sample Data'!D2:D5)");
            
            Row row2 = formulaSheet.createRow(1);
            row2.createCell(0).setCellValue("Average:");
            row2.createCell(1).setCellFormula("AVERAGE('Sample Data'!D2:D5)");
            
            // Auto-size columns
            for (int i = 0; i < headers.length; i++) {
                sheet.autoSizeColumn(i);
            }
            
            // Write to file
            try (FileOutputStream fos = new FileOutputStream(filename)) {
                workbook.write(fos);
            }
            
            System.out.println("Sample XLS file created: " + filename);
        }
    }
    
    public static void main(String[] args) {
        try {
            // Create sample file
            createSampleXLS("sample.xls");
            
            // Analyze the file
            XLSInfo info = analyzeXLSFile("sample.xls");
            
            System.out.println("\nFile Analysis:");
            System.out.println("Filename: " + info.filename);
            System.out.println("Number of sheets: " + info.numberOfSheets);
            System.out.println("Has macros: " + info.hasMacros);
            
            for (XLSInfo.SheetInfo sheet : info.sheets) {
                System.out.println("\nSheet: " + sheet.name);
                System.out.println("  Rows: " + sheet.rows);
                System.out.println("  Columns: " + sheet.columns);
                System.out.println("  Cells with data: " + sheet.cellsWithData);
                System.out.println("  Formulas: " + sheet.formulas);
                System.out.println("  Data types: " + sheet.dataTypes);
            }
            
            // Convert to CSV
            convertXLSToCSV("sample.xls", ".");
            
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}