QuangW
Thành viên mới
- Tham gia
- 1/12/23
- Bài viết
- 21
- Được thích
- 11
Chỉ cần như code sau:Em lọc được dữ liệu mình cần rồi anh ơi. Nhiều bước quá chứ không gọn như của anh.
Anh chỉ em lặp để lấy dữu liệu từ nhiều file với ạ.
let
Source = Pdf.Tables(File.Contents("D:\MyPham\DOWNLOAD\Documents\Datapdf\DATA _doc\1.pdf"), [Implementation="1.3"]),
Table001 = Table.Skip(Source{[Id="Table001"]}[Data],7),
Headers = Table.PromoteHeaders(Table001),
AddTitle = Table.AddColumn(Headers, "Title", each if [Column1] = null then [Actual] else null),
Fill= Table.FillDown(AddTitle,{"Title"}),
AddActual = Table.AddColumn(Fill, "Actual2", each if [Column1] = "[image]" then [Actual] else null),
ChooseColumns = Table.SelectColumns(AddActual,{"Title", "Actual2"}),
Filter = Table.SelectRows(ChooseColumns, each ([Actual2] <> null) and ([Title] <> null)),
ReplaceNull = Table.ReplaceValue(Filter,"-","",Replacer.ReplaceText,{"Title"}),
Transpose = Table.PromoteHeaders(Table.Transpose(ReplaceNull)),
Types= Table.TransformColumnTypes(Transpose,{{"Y Value_Point5", type number}, {" Y Value_Point6", type number}, {"X Value_Point7", type number}, {"X Value_Point8", type number}})
in
Types
let
FolderName ="D:\MyPham\DOWNLOAD\Documents\Datapdf\DATA _doc",
FileNameList= Folder.Files(FolderName)[Name],
FileNum={0..List.Count(FileNameList)-1},
DataF= List.Transform(FileNum, (f) =>
let
FFullName =FolderName & "\" & FileNameList{f},
Source1 = Table.SelectRows(Pdf.Tables(File.Contents(FFullName), [Implementation="1.3"]),each ([Kind] = "Table"))[Data]{0},
Data1 = Table.Skip(Source1,7),
Headers = Table.PromoteHeaders(Data1),
AddTitle = Table.AddColumn(Headers, "Title", each if [Column1] = null then [Actual] else null),
Fill= Table.FillDown(AddTitle,{"Title"}),
AddActual = Table.AddColumn(Fill, "Actual2", each if [Column1] = "[image]" then [Actual] else null),
ChooseColumns = Table.SelectColumns(AddActual,{"Title", "Actual2"}),
Filter = Table.SelectRows(ChooseColumns, each ([Actual2] <> null) and ([Title] <> null)),
ReplaceNull = Table.ReplaceValue(Filter,"-","",Replacer.ReplaceText,{"Title"}),
Transpose = Table.PromoteHeaders(Table.Transpose(ReplaceNull))
in Transpose),
ToTable =Table.FromList(DataF,Splitter.SplitByNothing()),
ListColumns2 = List.LastN(Table.ColumnNames(DataF{0}),4),
Ketqua = Table.ExpandTableColumn(ToTable, "Column1", ListColumns2),
Filter2 = Table.SelectRows(Ketqua, each ([Y Value_Point5] <> null)),
AddIndex = Table.AddIndexColumn(Filter2, "STT", 1, 1, Int64.Type),
Reorder = Table.ReorderColumns(AddIndex,{"STT", "Y Value_Point5", " Y Value_Point6", "X Value_Point7", "X Value_Point8"}),
Types = Table.TransformColumnTypes(Reorder,{{"Y Value_Point5", type number}, {" Y Value_Point6", type number}, {"X Value_Point7", type number}, {"X Value_Point8", type number}})
in Types
Em cảm ơn anh ạ!Cho vào vòng lặp. File pdf số 2 bị sai cấu trúc, không giống những file khác nên bị rỗng
JavaScript:let FolderName ="D:\MyPham\DOWNLOAD\Documents\Datapdf\DATA _doc", FileNameList= Folder.Files(FolderName)[Name], FileNum={0..List.Count(FileNameList)-1}, DataF= List.Transform(FileNum, (f) => let FFullName =FolderName & "\" & FileNameList{f}, Source1 = Table.SelectRows(Pdf.Tables(File.Contents(FFullName), [Implementation="1.3"]),each ([Kind] = "Table"))[Data]{0}, Data1 = Table.Skip(Source1,7), Headers = Table.PromoteHeaders(Data1), AddTitle = Table.AddColumn(Headers, "Title", each if [Column1] = null then [Actual] else null), Fill= Table.FillDown(AddTitle,{"Title"}), AddActual = Table.AddColumn(Fill, "Actual2", each if [Column1] = "[image]" then [Actual] else null), ChooseColumns = Table.SelectColumns(AddActual,{"Title", "Actual2"}), Filter = Table.SelectRows(ChooseColumns, each ([Actual2] <> null) and ([Title] <> null)), ReplaceNull = Table.ReplaceValue(Filter,"-","",Replacer.ReplaceText,{"Title"}), Transpose = Table.PromoteHeaders(Table.Transpose(ReplaceNull)) in Transpose), ToTable =Table.FromList(DataF,Splitter.SplitByNothing()), ListColumns2 = List.LastN(Table.ColumnNames(DataF{0}),4), Ketqua = Table.ExpandTableColumn(ToTable, "Column1", ListColumns2), Filter2 = Table.SelectRows(Ketqua, each ([Y Value_Point5] <> null)), AddIndex = Table.AddIndexColumn(Filter2, "STT", 1, 1, Int64.Type), Reorder = Table.ReorderColumns(AddIndex,{"STT", "Y Value_Point5", " Y Value_Point6", "X Value_Point7", "X Value_Point8"}), Types = Table.TransformColumnTypes(Reorder,{{"Y Value_Point5", type number}, {" Y Value_Point6", type number}, {"X Value_Point7", type number}, {"X Value_Point8", type number}}) in Types
View attachment 297543