Skip to content

Commit

Permalink
🐛 fix: improve url rules (#6669)
Browse files Browse the repository at this point in the history
* improve code

* improve code
  • Loading branch information
arvinxx authored Mar 3, 2025
1 parent 1a0330c commit 5ee59e3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 1 addition & 1 deletion packages/web-crawler/src/crawImpl/jina.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { CrawlImpl } from '../type';

export const jina: CrawlImpl<{ apiKey?: string }> = async (url, params) => {
const token = params.apiKey ?? process.env.JINA_API_KEY;
const token = params.apiKey ?? process.env.JINA_READER_API_KEY ?? process.env.JINA_API_KEY;

try {
const res = await fetch(`https://r.jina.ai/${url}`, {
Expand Down
8 changes: 7 additions & 1 deletion packages/web-crawler/src/urlRules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ export const crawUrlRules: CrawlUrlRule[] = [
urlPattern: 'https://medium.com/(.*)',
urlTransform: 'https://scribe.rip/$1',
},

{
filterOptions: {
enableReadability: false,
},
impls: ['jina', 'browserless'],
urlPattern: 'https://(twitter.com|x.com)/(.*)',
},
// 体育数据网站规则
{
filterOptions: {
Expand Down

0 comments on commit 5ee59e3

Please sign in to comment.