linqpad 代码
// 手册 https://playwright.dev/dotnet/docs/actionability
async System.Threading.Tasks.Task Main(string[] args)
{
var app = CreateApp(args);
var logger = GetLogger(app);
// 安装一下
// Microsoft.Playwright.Program.Main(new string[] { "install" });
using var playwright = await Microsoft.Playwright.Playwright.CreateAsync();
var proxy = new Microsoft.Playwright.Proxy() { Server = "http://192.168.0.90:17890" };
await using var browser = await playwright.Chromium.LaunchAsync(new() { Headless = false, Proxy = proxy });
Microsoft.Playwright.BrowserNewContextOptions options = new Microsoft.Playwright.BrowserNewContextOptions();
var page = await browser.NewPageAsync();
var icons = new List<string>();
var set = new HashSet<string>(); // 用来比较有没有新的元素出现
try
{
await page.GotoAsync("https://pictogrammers.com/library/mdi/");
// await page.ScreenshotAsync(new() { Path = "screenshot.png" });
await page.WaitForSelectorAsync(@"//div[@class='virtuoso-grid-item']");
var trytimes = 0;
while (true)
{
var firstPage = !set.Any();
var list = new List<string>();
var elements = await page.QuerySelectorAllAsync(@"//div[@class='virtuoso-grid-item']/a");
var button = await page.QuerySelectorAsync(@"//button[.='Do Not Track']");
if (button != null)
{
await button.ClickAsync();
}
foreach (var element in elements)
{
var innerHtml = await element.InnerHTMLAsync();
HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
htmlDocument.LoadHtml(innerHtml);
var xpathNavigator = htmlDocument.CreateNavigator();
var titleNode = xpathNavigator.SelectSingleNode(".//title");
var title = titleNode?.Value;
if (string.IsNullOrWhiteSpace(title))
{
continue;
}
list.Add(title);
}
if (!firstPage && list.TrueForAll(it => set.Contains(it)))
{
trytimes++;
if (trytimes < 3) {
logger.LogInformation("没有新元素了, 重试");
await System.Threading.Tasks.Task.Delay(TimeSpan.FromSeconds(1));
continue; // 如果没有新元素出现,可能太快了,再试几次
}
logger.LogInformation("没有新元素了, 退出");
break;
}
trytimes = 0;
set.Clear();
list.ForEach(it => set.Add(it));
icons.AddRange(list);
var lastElement = elements.LastOrDefault();
if (lastElement == null)
{
logger.LogInformation("没有元素了");
break;
}
await lastElement.FocusAsync();
await System.Threading.Tasks.Task.Delay(TimeSpan.FromSeconds(0.1));
}
// 保存一下
logger.LogInformation("保存文件");
var desktop = System.Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
var path = Path.Combine(desktop, "icons.txt");
await File.WriteAllLinesAsync(path, icons);
logger.LogInformation("处理完成");
}
finally
{
await page.CloseAsync();
}
}
// 返回日志
Microsoft.Extensions.Logging.ILogger GetLogger(Microsoft.Extensions.Hosting.IHost app)
{
var logger = app.Services.GetRequiredService<Microsoft.Extensions.Logging.ILogger<UserQuery>>();
return logger;
}
// 创建应用
Microsoft.Extensions.Hosting.IHost CreateApp(string[] args)
{
Serilog.Log.Logger = new Serilog.LoggerConfiguration().WriteTo.LINQPad(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {Message}{NewLine}{Exception}").CreateLogger();
Microsoft.Extensions.Hosting.HostApplicationBuilder builder = Microsoft.Extensions.Hosting.Host.CreateApplicationBuilder(args);
builder.Services.AddLogging(it =>
{
it.ClearProviders();
it.AddSerilog(dispose: true);
});
var app = builder.Build();
return app;
}