|
对于常用的ANSI、Unicode、UTF-8、GB2312等编码的txt文件,本代码仅提供当前目录下的单个txt文件的读取,先判断txt文件的编码,再根据编码采用不同的读取方法,避免出现乱码的情况。(附件中的txt文件是UTF-8的,大家可以分别另存为别的编码进行测试。)
- Sub test()
- Dim i%, j%, s, st, arrByte() As Byte, FileCode$, arr()
- Application.ScreenUpdating = False
- Filename = ThisWorkbook.Path & "" & Dir(ThisWorkbook.Path & "" & "*.txt") '获取当前目录下的txt文件
- If Len(Dir(ThisWorkbook.Path & "" & "*.txt")) Then '判断是否存在txt文件
- FileCode = GetFileCode(Filename) 'GetfileCode函数判断txt文件编码
- If FileCode <> "ANSI or Other" Then '判断编码
- Open Filename For Binary Access Read As #1
- ReDim arrByte(LOF(1) - 1)
- Get #1, , arrByte
- Close #1
- s = Split(ByteToStr(arrByte, FileCode), vbNewLine) '调用ByteToStr函数,s获取UTF-8或Unicode编码的内容
- Else
- Open Filename For Input As #1
- s = Split(StrConv(InputB(LOF(1), 1), vbUnicode), vbCrLf) 's获取ANSI或GB2312编码的内容
- Close #1
- End If
- Else
- MsgBox "当前目录下没有txt文件!": Exit Sub '当前目录下没有txt文件,退出程序
- End If
- If UBound(s) < 0 Then MsgBox "txt文件中没有数据!": Exit Sub 'txt文件中没有内容,退出程序
- ReDim arr(1 To UBound(s) + 1, 1 To 100) '预设100列
- For i = 0 To UBound(s)
- st = Split(s(i), vbTab) '以分隔符号Tab将数组s的内容分割开
- If UBound(arr, 2) > UBound(st) + 1 And UBound(st) > 0 Then ReDim Preserve arr(1 To UBound(s) + 1, 1 To UBound(st) + 1) '根据数组st中的最大个数重新设置arr的列数
- For j = 0 To UBound(st)
- arr(i + 1, j + 1) = st(j)
- Next
- Next
- With Sheets("Sheet1")
- .UsedRange.ClearContents
- .[A1].Resize(UBound(arr), UBound(arr, 2)) = arr
- End With
- Application.ScreenUpdating = True
- MsgBox "txt文件读取完成!", "64", "温馨提示"
- End Sub
- Function ByteToStr(arrByte, strCharset As String) As String 'ByteToStr函数读取UTF-8或Unicode编码的内容
- With CreateObject("Adodb.Stream")
- .Type = 1
- .Open
- .Write arrByte
- .Position = 0
- .Type = 2
- .Charset = strCharset
- ByteToStr = .Readtext
- .Close
- End With
- End Function
- Function GetFileCode(ByVal FilePath As String) '判断txt文件的编码
- Dim intFile As Integer
- Dim arrTmp(2) As Byte
- Dim i As Long
- intFile = FreeFile
- Open FilePath For Binary Access Read As #intFile
- Get #intFile, 1, arrTmp
- Close #intFile
- Select Case arrTmp(0) & arrTmp(1)
- Case "255254"
- GetFileCode = "Unicode"
- Case "254255"
- GetFileCode = "Unicode" '实为"Unicode Big Endian",为了读取时作为变量将其处理为“Unicode”
- Case "239187"
- GetFileCode = "UTF-8"
- Case Else
- GetFileCode = "ANSI or Other"
- End Select
- End Function
复制代码
|
评分
-
8
查看全部评分
-
|