Skip to content

📁 Python 文件操作

文件操作概述

文件操作是编程中非常重要的功能,Python 提供了丰富的文件操作功能,包括文件的读取、写入、创建、删除等操作。

💡 文件操作的重要性: - 数据持久化:将程序数据保存到文件中 - 配置管理:读取和写入配置文件 - 日志记录:记录程序运行信息 - 数据处理:处理各种格式的数据文件

📖 文件读取

基本文件读取

python
# 读取整个文件
def read_entire_file(filename):
    """读取整个文件内容"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        print(f"文件 {filename} 不存在")
        return None
    except Exception as e:
        print(f"读取文件时发生错误: {e}")
        return None

# 逐行读取文件
def read_file_line_by_line(filename):
    """逐行读取文件"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            lines = file.readlines()
        return lines
    except FileNotFoundError:
        print(f"文件 {filename} 不存在")
        return None

# 使用迭代器逐行读取(内存友好)
def read_file_iterator(filename):
    """使用迭代器逐行读取"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            for line_num, line in enumerate(file, 1):
                print(f"第{line_num}行: {line.strip()}")
    except FileNotFoundError:
        print(f"文件 {filename} 不存在")

# 测试文件读取
content = read_entire_file("example.txt")
if content:
    print("文件内容:")
    print(content)

不同编码的文件读取

python
def read_file_with_encoding(filename, encoding='utf-8'):
    """使用指定编码读取文件"""
    try:
        with open(filename, 'r', encoding=encoding) as file:
            content = file.read()
        return content
    except UnicodeDecodeError:
        print(f"无法使用 {encoding} 编码读取文件")
        # 尝试其他编码
        encodings = ['gbk', 'gb2312', 'latin-1']
        for enc in encodings:
            try:
                with open(filename, 'r', encoding=enc) as file:
                    content = file.read()
                print(f"成功使用 {enc} 编码读取文件")
                return content
            except UnicodeDecodeError:
                continue
        return None
    except Exception as e:
        print(f"读取文件时发生错误: {e}")
        return None

# 测试不同编码
content = read_file_with_encoding("chinese.txt", "utf-8")

✍️ 文件写入

基本文件写入

python
# 写入文本文件
def write_text_file(filename, content):
    """写入文本文件"""
    try:
        with open(filename, 'w', encoding='utf-8') as file:
            file.write(content)
        print(f"成功写入文件: {filename}")
        return True
    except Exception as e:
        print(f"写入文件时发生错误: {e}")
        return False

# 追加内容到文件
def append_to_file(filename, content):
    """追加内容到文件"""
    try:
        with open(filename, 'a', encoding='utf-8') as file:
            file.write(content)
        print(f"成功追加内容到文件: {filename}")
        return True
    except Exception as e:
        print(f"追加文件时发生错误: {e}")
        return False

# 写入多行内容
def write_multiple_lines(filename, lines):
    """写入多行内容"""
    try:
        with open(filename, 'w', encoding='utf-8') as file:
            for line in lines:
                file.write(line + '\n')
        print(f"成功写入 {len(lines)} 行到文件: {filename}")
        return True
    except Exception as e:
        print(f"写入多行文件时发生错误: {e}")
        return False

# 测试文件写入
lines = [
    "这是第一行",
    "这是第二行",
    "这是第三行"
]
write_multiple_lines("output.txt", lines)

二进制文件操作

python
def copy_binary_file(source, destination):
    """复制二进制文件"""
    try:
        with open(source, 'rb') as src_file:
            with open(destination, 'wb') as dst_file:
                # 分块读取,避免内存问题
                while True:
                    chunk = src_file.read(4096)  # 4KB 块
                    if not chunk:
                        break
                    dst_file.write(chunk)
        print(f"成功复制文件: {source} -> {destination}")
        return True
    except Exception as e:
        print(f"复制文件时发生错误: {e}")
        return False

# 读取图片文件信息
def get_image_info(filename):
    """获取图片文件信息"""
    try:
        with open(filename, 'rb') as file:
            # 读取文件头信息
            header = file.read(10)
            file_size = file.seek(0, 2)  # 移动到文件末尾
            file.seek(0)  # 回到文件开头
            
            return {
                "filename": filename,
                "size": file_size,
                "header": header.hex()
            }
    except Exception as e:
        print(f"读取图片信息时发生错误: {e}")
        return None

📊 CSV 文件操作

读取 CSV 文件

python
import csv

def read_csv_file(filename):
    """读取 CSV 文件"""
    try:
        data = []
        with open(filename, 'r', encoding='utf-8', newline='') as file:
            reader = csv.reader(file)
            header = next(reader)  # 读取标题行
            for row in reader:
                data.append(row)
        
        return {"header": header, "data": data}
    except Exception as e:
        print(f"读取 CSV 文件时发生错误: {e}")
        return None

def read_csv_as_dict(filename):
    """将 CSV 文件读取为字典列表"""
    try:
        data = []
        with open(filename, 'r', encoding='utf-8', newline='') as file:
            reader = csv.DictReader(file)
            for row in reader:
                data.append(row)
        return data
    except Exception as e:
        print(f"读取 CSV 文件时发生错误: {e}")
        return None

# 测试 CSV 读取
csv_data = read_csv_file("students.csv")
if csv_data:
    print("CSV 标题:", csv_data["header"])
    print("CSV 数据:", csv_data["data"][:3])  # 显示前3行

写入 CSV 文件

python
def write_csv_file(filename, data, headers=None):
    """写入 CSV 文件"""
    try:
        with open(filename, 'w', encoding='utf-8', newline='') as file:
            writer = csv.writer(file)
            
            if headers:
                writer.writerow(headers)
            
            for row in data:
                writer.writerow(row)
        
        print(f"成功写入 CSV 文件: {filename}")
        return True
    except Exception as e:
        print(f"写入 CSV 文件时发生错误: {e}")
        return False

def write_csv_from_dict(filename, data, fieldnames):
    """从字典列表写入 CSV 文件"""
    try:
        with open(filename, 'w', encoding='utf-8', newline='') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(data)
        
        print(f"成功写入 CSV 文件: {filename}")
        return True
    except Exception as e:
        print(f"写入 CSV 文件时发生错误: {e}")
        return False

# 测试 CSV 写入
student_data = [
    ["张三", "20", "计算机科学"],
    ["李四", "21", "软件工程"],
    ["王五", "19", "数据科学"]
]
headers = ["姓名", "年龄", "专业"]
write_csv_file("students_output.csv", student_data, headers)

📄 JSON 文件操作

JSON 文件读写

python
import json

def read_json_file(filename):
    """读取 JSON 文件"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"文件 {filename} 不存在")
        return None
    except json.JSONDecodeError as e:
        print(f"JSON 格式错误: {e}")
        return None
    except Exception as e:
        print(f"读取 JSON 文件时发生错误: {e}")
        return None

def write_json_file(filename, data, indent=2):
    """写入 JSON 文件"""
    try:
        with open(filename, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=indent)
        print(f"成功写入 JSON 文件: {filename}")
        return True
    except Exception as e:
        print(f"写入 JSON 文件时发生错误: {e}")
        return False

# 测试 JSON 操作
student_info = {
    "students": [
        {"name": "张三", "age": 20, "major": "计算机科学"},
        {"name": "李四", "age": 21, "major": "软件工程"}
    ],
    "total": 2
}

write_json_file("students.json", student_info)
loaded_data = read_json_file("students.json")
if loaded_data:
    print("JSON 数据:", loaded_data)

📁 目录操作

目录遍历

python
import os

def list_directory_contents(path="."):
    """列出目录内容"""
    try:
        contents = os.listdir(path)
        files = []
        directories = []
        
        for item in contents:
            item_path = os.path.join(path, item)
            if os.path.isfile(item_path):
                files.append(item)
            elif os.path.isdir(item_path):
                directories.append(item)
        
        return {"files": files, "directories": directories}
    except Exception as e:
        print(f"列出目录内容时发生错误: {e}")
        return None

def walk_directory(path):
    """递归遍历目录"""
    try:
        for root, dirs, files in os.walk(path):
            level = root.replace(path, '').count(os.sep)
            indent = ' ' * 2 * level
            print(f"{indent}{os.path.basename(root)}/")
            
            sub_indent = ' ' * 2 * (level + 1)
            for file in files:
                print(f"{sub_indent}{file}")
    except Exception as e:
        print(f"遍历目录时发生错误: {e}")

# 测试目录操作
contents = list_directory_contents(".")
if contents:
    print("文件:", contents["files"])
    print("目录:", contents["directories"])

文件信息获取

python
import os
from datetime import datetime

def get_file_info(filename):
    """获取文件详细信息"""
    try:
        if not os.path.exists(filename):
            print(f"文件 {filename} 不存在")
            return None
        
        stat = os.stat(filename)
        
        info = {
            "filename": filename,
            "size": stat.st_size,
            "created": datetime.fromtimestamp(stat.st_ctime),
            "modified": datetime.fromtimestamp(stat.st_mtime),
            "accessed": datetime.fromtimestamp(stat.st_atime),
            "is_file": os.path.isfile(filename),
            "is_directory": os.path.isdir(filename),
            "permissions": oct(stat.st_mode)[-3:]
        }
        
        return info
    except Exception as e:
        print(f"获取文件信息时发生错误: {e}")
        return None

# 测试文件信息
file_info = get_file_info("example.txt")
if file_info:
    print("文件信息:")
    for key, value in file_info.items():
        print(f"  {key}: {value}")

🔧 文件操作工具类

文件管理器类

python
class FileManager:
    """文件管理器类"""
    
    def __init__(self, base_path="."):
        self.base_path = base_path
    
    def create_file(self, filename, content=""):
        """创建文件"""
        filepath = os.path.join(self.base_path, filename)
        try:
            with open(filepath, 'w', encoding='utf-8') as file:
                file.write(content)
            print(f"成功创建文件: {filepath}")
            return True
        except Exception as e:
            print(f"创建文件时发生错误: {e}")
            return False
    
    def delete_file(self, filename):
        """删除文件"""
        filepath = os.path.join(self.base_path, filename)
        try:
            if os.path.exists(filepath):
                os.remove(filepath)
                print(f"成功删除文件: {filepath}")
                return True
            else:
                print(f"文件不存在: {filepath}")
                return False
        except Exception as e:
            print(f"删除文件时发生错误: {e}")
            return False
    
    def copy_file(self, source, destination):
        """复制文件"""
        source_path = os.path.join(self.base_path, source)
        dest_path = os.path.join(self.base_path, destination)
        
        try:
            with open(source_path, 'rb') as src:
                with open(dest_path, 'wb') as dst:
                    dst.write(src.read())
            print(f"成功复制文件: {source} -> {destination}")
            return True
        except Exception as e:
            print(f"复制文件时发生错误: {e}")
            return False
    
    def move_file(self, source, destination):
        """移动文件"""
        source_path = os.path.join(self.base_path, source)
        dest_path = os.path.join(self.base_path, destination)
        
        try:
            os.rename(source_path, dest_path)
            print(f"成功移动文件: {source} -> {destination}")
            return True
        except Exception as e:
            print(f"移动文件时发生错误: {e}")
            return False
    
    def search_files(self, pattern):
        """搜索文件"""
        import glob
        try:
            search_path = os.path.join(self.base_path, pattern)
            files = glob.glob(search_path)
            return files
        except Exception as e:
            print(f"搜索文件时发生错误: {e}")
            return []

# 测试文件管理器
fm = FileManager()
fm.create_file("test.txt", "这是一个测试文件")
fm.copy_file("test.txt", "test_copy.txt")
files = fm.search_files("*.txt")
print("找到的文件:", files)

🎯 实践练习

练习1:日志文件分析器

编写一个程序分析日志文件:

  • 统计不同级别的日志数量
  • 找出错误最多的时间段
  • 生成分析报告

练习2:配置文件管理器

创建一个配置文件管理器:

  • 支持 JSON、YAML、INI 格式
  • 读取和写入配置
  • 配置验证和默认值

练习3:文件备份工具

开发一个文件备份工具:

  • 增量备份
  • 压缩备份文件
  • 备份历史管理

练习4:数据转换工具

创建数据格式转换工具:

  • CSV 转 JSON
  • JSON 转 Excel
  • 支持批量转换

🔒 文件安全

文件权限检查

python
import stat

def check_file_permissions(filename):
    """检查文件权限"""
    try:
        if not os.path.exists(filename):
            print(f"文件 {filename} 不存在")
            return None
        
        file_stat = os.stat(filename)
        permissions = stat.filemode(file_stat.st_mode)
        
        info = {
            "filename": filename,
            "permissions": permissions,
            "readable": os.access(filename, os.R_OK),
            "writable": os.access(filename, os.W_OK),
            "executable": os.access(filename, os.X_OK)
        }
        
        return info
    except Exception as e:
        print(f"检查文件权限时发生错误: {e}")
        return None

# 测试文件权限
permissions = check_file_permissions("example.txt")
if permissions:
    print("文件权限信息:")
    for key, value in permissions.items():
        print(f"  {key}: {value}")

安全文件操作

python
def safe_file_operation(filename, operation, *args, **kwargs):
    """安全的文件操作"""
    try:
        # 检查文件路径安全性
        if ".." in filename or filename.startswith("/"):
            raise ValueError("不安全的文件路径")
        
        # 检查文件大小(防止读取过大文件)
        if os.path.exists(filename):
            file_size = os.path.getsize(filename)
            if file_size > 100 * 1024 * 1024:  # 100MB
                raise ValueError("文件过大,拒绝操作")
        
        # 执行操作
        return operation(filename, *args, **kwargs)
        
    except Exception as e:
        print(f"安全文件操作失败: {e}")
        return None

# 测试安全文件操作
def read_file_safe(filename):
    """安全读取文件"""
    return safe_file_operation(filename, read_entire_file)

📊 性能优化

大文件处理

python
def process_large_file(filename, chunk_size=8192):
    """处理大文件(分块读取)"""
    try:
        with open(filename, 'rb') as file:
            while True:
                chunk = file.read(chunk_size)
                if not chunk:
                    break
                # 处理数据块
                yield chunk
    except Exception as e:
        print(f"处理大文件时发生错误: {e}")

def count_lines_large_file(filename):
    """统计大文件行数"""
    try:
        line_count = 0
        with open(filename, 'r', encoding='utf-8') as file:
            for line in file:
                line_count += 1
        return line_count
    except Exception as e:
        print(f"统计行数时发生错误: {e}")
        return 0

# 测试大文件处理
for chunk in process_large_file("large_file.txt"):
    # 处理每个数据块
    pass

📈 文件操作总结

文件操作模式

模式描述用途
'r'只读读取文件内容
'w'写入创建新文件或覆盖现有文件
'a'追加在文件末尾添加内容
'x'创建创建新文件,如果文件存在则失败
'r+'读写读取和写入文件
'rb'二进制只读读取二进制文件
'wb'二进制写入写入二进制文件

最佳实践

  1. 使用 with 语句:自动管理文件资源
  2. 指定编码:避免编码问题
  3. 异常处理:处理文件操作可能的错误
  4. 大文件分块:避免内存问题
  5. 路径安全:检查文件路径的安全性

下一步

现在你已经掌握了 Python 的文件操作,接下来学习:

  • 实践项目 - 综合应用所学知识
  • 深入学习其他 Python 库(如 pandas、numpy)

💡 学习建议:多练习不同格式的文件操作,掌握文件处理的最佳实践