拆分EXCEL文件工具

根据某一列，拆分成几个不同的EXCEL文件
操作方法：将EXCEL文件拖至EXE文件上，根据提示操作。

链接:

https://www.lanzouj.com/b00za3j6j
工具二：应网友要求，增加一个将EXCEL文件里的多个工作表保存为单独工作薄的小工具：
https://www.lanzouj.com/b00za3j6j
其它工具链接：

【V2.3】批量合并EXCEL工作表小工具

https://bbs.songma.com/thread-1029677-1-1.html

拆分EXCEL文件工具

PYTHON源码：

import openpyxl
from openpyxl.utils import get_column_letter
import xlrd
import sys, os, collections
from pprint import pprint
from copy import copy


class splitExcel(object):

    def __init__(self, sourceFile, titleLine=None, splitColumn=None):
        self.sourceFile = sourceFile
        self.sourceWorkbook = openpyxl.load_workbook(sourceFile)
        self.targetWorkbook = openpyxl.Workbook()
        self.targetWorkbook.remove(self.targetWorkbook.active)

        # 源工作表（object对象）
        self.sourceWorksheet = None
        # 最大行数
        self.sourceWorkbookMaxRow = None
        # 最大列数
        self.sourceWorkbookMaxColumn = None
        # 源工作表索引号
        self.sourceWorksheetIndex = None
        # 标题所在行号，用户输入时索引从1开始，内部处理时请留意索引数
        # if titleLine == None:
        #     titleLine = int(input('请输入标题所在行[2]: ').strip())
        self.titleLine = titleLine
        # 根据哪个列进行拆分，用户输入时索引从1开始，内部处理时请留意索引数
        self.splitColumn = splitColumn
        # 源工作薄当前行号
        self.sbCurrectLine = None
        # 目标工作薄各表当前数据行号
        self.tbCurrectLines = {}
        # 表头文字
        self.header = []
        # 各表数据
        self.data = collections.OrderedDict()
        # 保存列宽
        self.columnsWidth = collections.OrderedDict()
        # 格式
        self.formats = {}

    def readData(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)
        for x in range(ws.nrows):
            if x < self.titleLine:
                # 表头
                self.header.append(ws.row_values(x))
            else:
                v = ws.cell(x, self.splitColumn - 1).value
                sheetName = self.clearSheetName(v)
                # 将表名加入data字典
                if sheetName not in self.data.keys():
                    self.data[sheetName] = []
                # 添加数据
                self.data[sheetName].append(ws.row_values(x))

    def selectSplitSheet(self):
        if len(self.sourceWorkbook.sheetnames) == 1:
            self.sourceWorksheet = self.sourceWorkbook.active
            self.sourceWorksheetIndex = 0
        else:
            _n_ = 0
            print('在工作薄中找到以下工作表：')
            for SheetName in self.sourceWorkbook.sheetnames:
                print(_n_, SheetName)
                _n_ += 1

            n = 0
            _input = input('请输入要拆分表的序号[0]: ').strip()
            if _input != '':
                n = int(_input)
            self.sourceWorksheet = self.sourceWorkbook.worksheets[n]
            self.sourceWorksheetIndex = n

    def selectSplitColumn(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)

        # 保存一下最大行数、列数，以供其它（属性）地方使用
        self.sourceWorkbookMaxRow = ws.nrows
        self.sourceWorkbookMaxColumn = ws.ncols

        print('
在工作表的标题行（第 %s 行）找到以下列: ' % self.titleLine)

        for y in range(1, ws.ncols + 1):
            print(y, ws.cell(self.titleLine - 1, y - 1).value)

        columnNum = input('请输入拆分列号[2]: ').strip()
        if columnNum == '':
            columnNum = 2
        else:
            columnNum = int(columnNum)
        self.splitColumn = columnNum

    def readCellsStyle(self):
        ws = self.sourceWorksheet
        maxColumn = self.sourceWorkbookMaxColumn
        styles = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        fonts = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        borders = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        fills = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        alignments = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        number_formats = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        protections = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        heights = [None] * (self.titleLine + 11)
        widths = [None] * (maxColumn + 11)
        # isDates = [None] * (maxColumn + 11)

        for x in range(1, self.titleLine + 2):
            heights[x] = ws.row_dimensions[x].height
            # print(x,'height',heights[x])
            # for y in range(1, ws.max_column + 1):
            for y in range(1, maxColumn + 1):
                styles[x][y] = copy(ws.cell(x, y).style.replace('常规', 'Normal'))
                fonts[x][y] = copy(ws.cell(x, y).font)
                borders[x][y] = copy(ws.cell(x, y).border)
                fills[x][y] = copy(ws.cell(x, y).fill)
                alignments[x][y] = copy(ws.cell(x, y).alignment)
                number_formats[x][y] = copy(ws.cell(x, y).number_format)
                protections[x][y] = copy(ws.cell(x, y).protection)
                if y not in widths:
                    widths[y] = ws.column_dimensions[get_column_letter(y)].width
                # print(y, get_column_letter(y), ws.column_dimensions[get_column_letter(y)].width)
                # if y not in isDates:
                #     isDates[y] = ws.cell(x, y).is_date

        self.formats['heights'] = heights
        self.formats['styles'] = styles
        self.formats['fonts'] = fonts
        self.formats['borders'] = borders
        self.formats['fills'] = fills
        self.formats['alignments'] = alignments
        self.formats['number_formats'] = number_formats
        self.formats['protections'] = protections
        self.formats['widths'] = widths
        # self.formats['isDates'] = isDates

    def writeFormatToNewWorkbook(self):
        for sheetName in self.data.keys():
            ws = self.targetWorkbook[sheetName]
            # for x in range(1,self.titleLine+2):
            for x in range(1, ws.max_row + 1):
                # 表头
                if x <= self.titleLine:
                    xx = x
                    # ws.row_dimensions[x].height = self.formats['heights'][x]
                else:
                    # 表数据
                    xx = self.titleLine + 1
                height = self.formats['heights'][xx]
                if not height == None and height > 0:
                    ws.row_dimensions[x].height = self.formats['heights'][xx]

                for y in range(1, ws.max_column + 1):
                    # 表头
                    if x <= self.titleLine:
                        # 路过空白单元格
                        # if ws.cell(x, y).value == '':
                        #     continue
                        width = self.formats['widths'][y]
                        if not width == None and width > 0:
                            ws.column_dimensions[get_column_letter(y)].width = self.formats['widths'][y]
                        xx = x
                    else:
                        # 表数据
                        xx = self.titleLine + 1
                    ws.cell(x, y).style = self.formats['styles'][xx][y]
                    ws.cell(x, y).font = self.formats['fonts'][xx][y]
                    ws.cell(x, y).border = self.formats['borders'][xx][y]
                    ws.cell(x, y).fill = self.formats['fills'][xx][y]
                    ws.cell(x, y).alignment = self.formats['alignments'][xx][y]
                    ws.cell(x, y).number_format = self.formats['number_formats'][xx][y]
                    ws.cell(x, y).protection = self.formats['protections'][xx][y]

                    # if x>self.titleLine and self.formats['isDates'][y]:
                    #     ws.cell(x, y).number_format = 'yyyy/mm/dd'

    def writeDataToNewWorkbook(self):
        for sheetName in self.data.keys():
            ws = self.targetWorkbook.create_sheet(sheetName)
            # 写入头文字
            x = 0  # 行号
            for row in self.header:
                x += 1
                y = 0  # 列号
                for cellValue in row:
                    y += 1
                    if not cellValue == '':
                        ws.cell(x, y).value = cellValue
            # 写入数据
            for row in self.data[sheetName]:
                ws.append(row)

    def clearSheetName(self, name, replaceAs='-'):
        invalidChars = r':/?*[]：'
        for c in invalidChars:
            name = name.replace(c, replaceAs).strip()
        return name

    def selectTitleLine(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)

        # 保存一下最大行数、列数，以供其它（属性）地方使用
        self.sourceWorkbookMaxRow = ws.nrows
        self.sourceWorkbookMaxColumn = ws.ncols

        print('打印所拆分工作表前10行，前5列数据：')
        maxY = ws.ncols
        if ws.ncols > 5:
            maxY = 5
        for x in range(10):
            tempList = []
            for y in range(maxY):
                tempList.append(ws.cell(x, y).value)
            print('第%s行：' % (x + 1), tempList)

        titleLine = 2
        n = input('
请输入标题行所在行号[2]：').strip()
        if not n == '':
            titleLine = int(n)

        self.titleLine = titleLine

    def make(self):
        self.selectSplitSheet()
        self.selectTitleLine()
        self.selectSplitColumn()

        print('开始读取数据...')
        self.readData()
        print('开始读取格式...')
        self.readCellsStyle()
        print('开始写入数据至分表...')
        self.writeDataToNewWorkbook()
        print('开始写入格式至分表...')
        self.writeFormatToNewWorkbook()

    def save(self, filename=None):
        if filename == None:
            splitPath = os.path.split(self.sourceFile)
            filename = splitPath[0] + '/拆分_' + splitPath[1]
        self.targetWorkbook.save(filename)
        self.sourceWorkbook.close()
        self.targetWorkbook.close()
        return filename


class saveWorksheetToWorkbook(object):
    def __init__(self, excelFile):
        self.excelFile = excelFile

    def saveTo(self, savePath=None, addNumToFilename=True):
        if savePath == None:
            splitPath = os.path.splitext(self.excelFile)
            savePath = splitPath[0]
            if not os.path.exists(savePath):
                os.makedirs(savePath, exist_ok=True)

        wb = openpyxl.load_workbook(self.excelFile)
        sheetNames = wb.sheetnames
        wb.close()

        #
        # _input=input('正在保存各表至独立工作薄，文件名前是否加序号？[Y] Y/N：').strip().lower()
        # if _input=='n':
        #     addNumToFilename=False

        n=0
        for sheetName in sheetNames:
            n+=1
            print('保存', n, sheetName)
            wb = openpyxl.load_workbook(self.excelFile)
            for ws in wb.worksheets:
                if not sheetName == ws.title:
                    wb.remove(ws)

            xh=''
            if addNumToFilename:
                xh=str(n)
            filename='%s/%s%s.xlsx' % (savePath, xh,sheetName)
            wb.save(filename)
            wb.close()

        return savePath


if __name__ == '__main__':
    file = r'C:UsersEyesDesktop汇总.xlsx'
    file = sys.argv[1]
    se = splitExcel(file)
    # file = r'C:UsersEyesDesktop2019年积分排名－2019-07-09.xlsx'
    # se = splitExcel(file, 1, 3)
    se.make()
    f = se.save()
    print('拆分汇总文件：', f)

    # f=r'C:UsersEyesDesktop汇总.拆分.xlsx'
    saveTo = saveWorksheetToWorkbook(f)
    p = saveTo.saveTo()
    print('拆分表保存文件夹：', p)

    input('
完成，按回车键退出。')

文章版权归作者所有，未经允许请勿转载。如内容涉嫌侵权，请在本页底部进入<联系我们>进行举报投诉!

THE END

知识分享

评论共46条

请登录后发表评论

登录注册

只看作者

- 星星护诺梦0
  原创，非常感谢分享，收藏了
  4个月前回复
  举报
- 故事熊0
  感谢分享，这个挺实用的
  4个月前回复
  举报
- 实体店吸金术0
  感谢分享，好厉害啊!~~~
  4个月前回复
  举报
- 冬冬的专属空间0
  感谢老师，第一次离开发者这么近，而且完美的解决了我想要解决的问题，提高工作效率了。都激动的不知道说什么了。
  4个月前回复
  举报
- 黄岐菡0
  感谢楼主分享
  4个月前回复
  举报
- 南洪珠0
  openpyxl 只能处理xlsx 遇到xls就报错能解决吗？
  4个月前回复
  举报
- 筱小明0
  感谢分享~~~~~~~~~~~~~
  4个月前回复
  举报
- 爱何妍妍0
  还能拆分表格啊.
  4个月前回复
  举报
- 星星闪耀blingbling0
  学到很多东西，大佬加油
  4个月前回复
  举报
- 月华笼露华0
  感谢分享，辛苦了啦！
  4个月前回复
  举报
- 留学生实习求职0
  非常感谢啊找了很久这个了
  4个月前回复
  举报
- 甜0
  谢谢楼主，好东西收藏了
  4个月前回复
  举报
- 会飞的鱼0
  虽不如某汇总大师好，但能感觉到楼主很用心！感谢分享啦！期待以后能分享到某汇总大师！！！
  4个月前回复
  举报
- 墨玉寒天0
  谢谢楼主分享这个可以省很多时间
  4个月前回复
  举报
- 乐比悠悠0
  感谢分享谢谢
  4个月前回复
  举报
- ici00
  感谢分享，学习一下
  4个月前回复
  举报
- -鬃毛-0
  做excel挺有用的，谢谢
  4个月前回复
  举报
- 天又变凉了0
  电子表格编辑的软件是时候该掌握了。
  4个月前回复
  举报
- 打歌打歌0
  正需要，谢谢了！
  4个月前回复
  举报
- 向阳0
  设置为可以手动选文件夹才是正理。
  4个月前回复
  举报