适合单表头和多级表头,注意关键词判断

from openpyxl import load_workbook
from openpyxl.cell import MergedCell
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.merge import MergedCellRange

file = "工资表_双层表头.xlsx"
file="2022年1月工资表.xlsx"
file="工资表_单层表头.xlsx"
#path="/root/PycharmProjects1/python/excel/{}".format(file)
book = load_workbook(file)
ws = book.active


def get_header():
    # df = pd.read_excel(file, header=[0, 1], engine='openpyxl')
    # header = df.columns.to_list()
    # print(header)
    # return
    # df.loc[1].to_csv('test.csv')

    row1 = ws[1]
    row2 = ws[2]
    merged_cells = ws.merged_cells.ranges
    letters = []
    for i in range(1, ws.max_column + 1):
        letter = get_column_letter(i)
        letters.append(letter)
    print(letters)
    is_two_header=False
    for letter in letters:
        coord = '{}1:{}2'.format(letter, letter)
        if MergedCellRange(ws, coord) in merged_cells:
            print('是二级', coord)
            is_two_header=True
            break

    headers2 = []
    for i, e in enumerate(row1):

        # if isinstance(e,MergedCell) or isinstance(row2[i],MergedCell):
        if e.value is not None:
            headers2.append({'key': e.value, 'index': i})
        if is_two_header:
            if row2[i].value is not None:
                if 'child' in headers2[-1]:
                    headers2[-1]['child'].append({'key': row2[i].value, 'index': i})
                else:
                    headers2[-1]['child'] = [{'key': row2[i].value, 'index': i}]

    print(headers2)
    return headers2


#header = get_header()


def get_value(row):
    """
    get row value
    :param ws:
    :param row:
    :return:
    """
    value = []
    for i in header:
        if 'child' in i:
            for j in i['child']:
                v = row[j['index']].value
                value.append(v)
        else:
            v = row[i['index']].value
            value.append(v)
    print(value)
    # for i,e in enumerate(row):
    #     print(e.value)
def get_headers2():
    merged_cells = ws.merged_cells.ranges
    is_two_header = False
    name_cell_row_start = None
    name_cell_row_end=None
    row1=None
    for merge_cell in merged_cells:
        cell = ws.cell(merge_cell.min_row, merge_cell.min_col)
        #print(cell.value)
        if cell.value and isinstance(cell.value,str) and '姓名' in cell.value:
            is_two_header=True
            name_cell_row_start=merge_cell.min_row
            name_cell_row_end=merge_cell.max_row
            row1 = ws[name_cell_row_start]
            row2 = ws[name_cell_row_end]
            break
    if not is_two_header:
        for row in  ws.iter_rows(max_row=100):
            for cell in row:
                if cell.value and isinstance(cell.value,str) and '姓名' in cell.value:
                    row1 = ws[cell.row]
                    break
    if not row1:
        print('没有姓名')
        return 
    headers2 = []
    for i, e in enumerate(row1):

        # if isinstance(e,MergedCell) or isinstance(row2[i],MergedCell):
        if e.value is not None:
            headers2.append({'key': e.value, 'index': i})
        if is_two_header:
            if row2[i].value is not None:
                if 'child' in headers2[-1]:
                    headers2[-1]['child'].append({'key': row2[i].value, 'index': i})
                else:
                    headers2[-1]['child'] = [{'key': row2[i].value, 'index': i}]

    print(headers2)
    return headers2




get_headers2()

Logo

GitCode 天启AI是一款由 GitCode 团队打造的智能助手,基于先进的LLM(大语言模型)与多智能体 Agent 技术构建,致力于为用户提供高效、智能、多模态的创作与开发支持。它不仅支持自然语言对话,还具备处理文件、生成 PPT、撰写分析报告、开发 Web 应用等多项能力,真正做到“一句话,让 Al帮你完成复杂任务”。

更多推荐