+
    Fj7I                         R t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIH	t	 ^ RI
HtHt ^ RIHt ^ RIHt RtRRR	R
RR/t0 RmtRRRRRRRR/t0 RmtR tR tR t ! R R]4      tR# )aJ  
Fetch accurate per-POI images from the Openverse API.

For every place in the database it builds a targeted query from the place name
(plus city for disambiguation), searches Openverse, ranks the results by how well
their title/tags match the place name, and downloads only confident matches.
Images are saved under MEDIA_ROOT/poi/<city_slug>/ and linked via PlaceImage.

Usage:
    python manage.py fetch_poi_images                      # top 50 places per city
    python manage.py fetch_poi_images --city berlin --limit 100
    python manage.py fetch_poi_images --per-place 3 --compute-features
    python manage.py fetch_poi_images --client-id ID --client-secret SECRET
    python manage.py fetch_poi_images --dry-run            # preview matches only
    python manage.py fetch_poi_images --clear              # remove old images first
N)urlparse)ThreadPoolExecutoras_completed)BaseCommand)settingszhttps://api.openverse.org/v1z
User-AgentzuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36Acceptz!application/json, text/plain, */*zAccept-Languagezen-US,en;q=0.9londonzUnited KingdomberlinGermanynew_yorkzUnited StatesparisFrancec                 \    R P                  R \        P                  ! RV 4       4       4      # ) c              3   b   "   T F%  p\         P                  ! V4      '       d   K!  Vx  K'  	  R # 5iN)unicodedata	combining).0cs   & U/Users/jokubas/Desktop/engineering/processing/management/commands/fetch_poi_images.py	<genexpr> strip_accents.<locals>.<genexpr><   s'      6a$$Q' 	
6s   /
/NFKD)joinr   	normalize)texts   &r   strip_accentsr   ;   s-    77 ((6      c                     \        T ;'       g    R P                  4       4      p \        P                  ! RV 4       Uu. uF  p\	        V4      ^8  g   K  VNK  	  up# u upi )r   z
[^a-z0-9]+)r   lowerresplitlen)r   ts   & r   tokenizer%   B   sL    $**"++-.Dxxt4C4!A
AA4CCCs   AAc                    \        V 4       Uu. uF  q\        9  g   K  VNK  	  ppT;'       g    \        V 4      # u upi )z@Significant tokens of a place name, excluding generic stopwords.)r%   	STOPWORDS)namer$   tokss   &  r   name_tokensr*   G   s8    ~<~!));AA~D<!!8D>! =s   <<c                      a  ] tR t^Mt o RtR tR tR tR tR t	R t
R tR	 tRR
 ltRtRR lt]R 4       tR tRtV tR# )CommandzDFetch accurate per-POI images from Openverse and link them to placesc                   VP                  R \        RRR7       VP                  R\        ^2RR7       VP                  R\        ^RR7       VP                  R\        ^R	R7       VP                  R
\        RRR7       VP                  R\        \        P
                  P                  RR4      RR7       VP                  R\        \        P
                  P                  RR4      RR7       VP                  RRRR7       VP                  RRRR7       VP                  RRRR7       VP                  RRRR7       VP                  RRRR7       VP                  R \        R!R"R7       VP                  R#RR$R7       R# )%z--cityNzLimit to a single city slug)typedefaulthelpz--limitzKMax places to process per city (highest score first). Use 0 for ALL places.z	--workerszOConcurrent network workers. Higher = faster but more likely to hit rate limits.z--per-placez&Number of images to download per placez--min-matchg      ?zBMinimum fraction of name tokens that must appear in a result (0-1)z--client-idOPENVERSE_CLIENT_IDr   z8Openverse OAuth client id (optional, higher rate limits)z--client-secretOPENVERSE_CLIENT_SECRETz(Openverse OAuth client secret (optional)z--compute-features
store_truez;Compute KMeans colour feature vectors for downloaded images)actionr0   z--features-onlyu\   Skip all network work — only compute missing KMeans features for already-downloaded imagesz--clearzBDelete existing PlaceImage/ImageFeature for processed places firstz
--only-newz6Skip places that already have at least one image savedz	--dry-runz(Show matched results without downloadingz--sleepg        z0Seconds to wait between API searches (default 0)z--exclude-wikimediaz4Skip images hosted on wikimedia.org or wikipedia.org)add_argumentstrintfloatosenvironget)selfparsers   &&r   add_argumentsCommand.add_argumentsP   s   H3!> 	 	@IC!n 	 	pKc1!r 	 	tMQ!I 	 	KMs!e 	 	gMRZZ^^Lace=f![ 	 	]-CPikmAn!K 	 	M0!^ 	 	`-l! 	 	AIl!e 	 	gL!Y 	 	[K!K 	 	MIE3!S 	 	U1,!W 	 	Yr   c                   ^ RI HpHp \        P                  ! 4       V n        RV n        \        P                  V n	        VP                  P                  4       pVR,          '       dX   VP                  VR,          R7      pVP                  4       '       g)   V P                  P                  RVR,           R24       R# VR,          '       d   V P!                  WR4       R# VR,          '       d5   VR	,          '       d&   V P#                  VR,          VR	,          4      V n        \%        ^VR
,          4      p^ pV EFd  pVP                  P                  VR7      P'                  R4      p	VR,          '       d   VR,          ^ 8  d   V	RVR,           p	\)        V	4      p
VR,          '       dn   ^ RI Hp \-        VP                  P                  VR7      P/                  RRR7      P1                  4       4      pV
 Uu. uF  qP2                  V9  g   K  VNK  	  p
pV P4                  P                  V P6                  P9                  RVP:                   R\=        V
4       RV R24      4       \>        P@                  PC                  V P                  RVPD                  4      pVR,          '       g   \>        PF                  ! VRR7       ^ p\I        VR7      ;_uu_ 4       pV
 Uu/ uF"  pVPK                  V PL                  VWV4      VbK$  	  pp\O        V4       F|  pVV,          pVPQ                  4       pV^,          pWpPS                  VVV4      ,          pV^2,          ^ 8X  g   KN  V P4                  P                  RV R\=        V
4       RV R24       K~  	  RRR4       EKg  	  V P4                  P                  V P6                  PU                  R V R!24      4       R# u upi u upi   + '       g   i     EK  ; i)"    )CityPlaceNcity)slugzNo city with slug ''features_only	client_idclient_secretworkers)rD   z-interest_scorelimitonly_new)
PlaceImage)place__cityplace_idT)flat
=== z: processing z	 places (z workers) ===poidry_run)exist_ok)max_workers  .../z	 places, z images so farz
Done. Downloaded z images for places.)+places.modelsrB   rC   	threadinglocal_localtokenr   
MEDIA_ROOT
media_rootobjectsallfilterexistsstderrwrite_run_features_only
_get_tokenmaxorder_bylistrM   setvalues_listdistinctidstdoutstyleMIGRATE_HEADINGr(   r#   r9   pathr   rE   makedirsr   submit_plan_placer   result_commit_planSUCCESS)r<   argsoptrB   rC   citiesrJ   total_downloadedrD   qsplacesrM   already_donepout_dirdoneexplacefuturesfutplans   &*,                  r   handleCommand.handlen   sM   -oo'
"--!!#v;;]]F]4F==??!!$7F}A"FG##F0{O 4 4[)93;OPDJaY(D%%4%099:KLB7||Gq 0W&"XF:4"&&--$-? [$[7
  &,HVtt</G!!VHKKdjj88=V >9M+, - ggll4??E499EGy>>Gd3D $88B "(!' IId..tcJEQ!'   (0C#CLE::<DAID$(9(9%s(KK$byA~))E$qVY-=,>n+N O 1 983 N 	$**,,!"2!33FGI 	J7 I 988s1   ;OOO("(O#

AO(%2O(#O((O:c                   ^ RI HpHp ^ RIHp V EF  pVP
                  P                  VRR7      P                  VP
                  P                  R4      R7      P                  R4      pVP                  4       pV P                  P                  V P                  P                  RVP                   R	V R
24      4       ^ ;rVP!                  4        EF  p\"        P$                  P'                  V P(                  VP*                  4      pV	^,          p	\"        P$                  P-                  V4      '       gD   V P                  P                  V P                  P/                  RVP*                   24      4       K  V! V^
R7      pVfD   V P                  P                  V P                  P/                  RVP*                   24      4       K  VP
                  P1                  VP*                  RTRVP2                  '       d   VP2                  P5                  4       MRR\6        P8                  ! VP;                  4       4      RVP2                  /R7       V
^,          p
V	^2,          ^ 8X  g   EK  V P                  P                  RV	 RV RV
 R24       EK  	  V P                  P                  V P                  P=                  RV
 RV RVP                   24      4       EK  	  R# )zGCompute KMeans feature vectors for all downloaded images that lack one.rM   ImageFeaturecompute_image_featurezpoi/)rN   image_path__startswith
image_path)image_path__inr   rQ   z: computing features for z images ===z  [missing file] kNz  [feature failed] rD   category_namegeneralfeature_vectorr   defaultsrV   rW   z	 images, z features computedz  Done: z feature vectors computed for )rX   rM   r   processing.algorithms.kmeansr   r_   ra   excludevaluesselect_relatedcountrn   rd   ro   rp   r(   iteratorr9   rq   r   r^   r   rb   WARNINGupdate_or_creater   get_primary_categoryjsondumpstolistrw   )r<   rz   ry   rM   r   r   rD   imagestotalr   computedpiabs_pathvecs   &&&           r   re   Command._run_features_only   sP   :FD ((v$vvNwl.B.B.I.I,.WwX%~g. 
 LLNEKKdjj88#<UG;OQ R  Doo'77<<G	ww~~h//KK%%djj&8&8+BMM?;'= >+H;;KK%%djj&8&8-bmm_='? @$$55!}}'BHHH)F)F)HZc($**SZZ\*B	 6  A"9>KK%%dV1UG9XJN`&ab/ (2 KKdjj008*AeW,J499+VX YI r   c                    \        V P                  R R4      pVfE   \        P                  ! 4       pVP                  P                  \        4       WP                  n        V# )sessionN)getattrr[   requestsSessionheadersupdateBROWSER_HEADERSr   )r<   ss   & r   _sessionCommand._session   sH    DKKD19  "AII_-"#KKr   c           	          V P                  4       P                  \         R 2RRRVRV/^R7      pVP                  ^8X  dT   V P                  P                  V P                  P                  R4      4       VP                  4       P                  R4      # V P                  P                  RVP                   24       R
#   \        P                   d*   pT P                  P                  R	T 24        R
p?R
# R
p?ii ; i)z/auth_tokens/token/
grant_typeclient_credentialsrH   rI   )datatimeoutzOpenverse OAuth token acquiredaccess_tokenzToken request failed: zToken error: N)r   postAPI_BASEstatus_codern   rd   ro   rw   r   r;   rc   r   RequestException)r<   rH   rI   res   &&&  r   rf   Command._get_token   s    	3$$z1D%E2YM 	 % A
 }}#!!$**"4"45U"VWvvx||N33KK 6q}}oFG  (( 	3KKaS122	3s   BB? (B? ?C=C88C=c                   VP                   p\        P                  VP                  R4      pVP	                  4       pVR8X  d   RMTpRP                  R W5VP                   V3 4       4      RP                  R W2P                   V3 4       4      RP                  R W2P                   3 4       4      V.p\        4       . rV F7  p	V	'       g   K  W9  g   K  VP                  V	4       VP                  V	4       K9  	  V# )uG   Ordered, deduped query strings — richest (most disambiguating) first.r   rC    c              3   8   "   T F  q'       g   K  Vx  K  	  R # 5ir    r   r   s   & r   r   #Command._queries.<locals>.<genexpr>   s     E ?11QQ ?   	
c              3   8   "   T F  q'       g   K  Vx  K  	  R # 5ir   r   r   s   & r   r   r     s     @ :1aQQ :r   c              3   8   "   T F  q'       g   K  Vx  K  	  R # 5ir   r   r   s   & r   r   r     s     7 11QQQ 1r   )	r(   CITY_COUNTRYr;   rE   r   r   rj   addappend)
r<   r   rD   r(   countrycat
candidatesseenorderedqs
   &&&       r   _queriesCommand._queries   s    zz""499b1((*7Nb HHEDIIw ?EEHH@yy' :@@HH7yy 177	

 rgAqQ]q!  r   c                   \        VP                  4      pV P                  W4      p. pV F?  pV P                  V4      p	V P	                  WVR ,          VR,          4      pV'       g   K?   M	  V'       g   RRRV/# VR,          '       d   RRRV^ ,          /# . p
\        VRVR,           4       EFu  w  rV P                  VR	,          4      pVP                   R
V RV 2p\        P                  P                  W>4      p\        P                  P                  RVP                  V4      p\        VR	,          4      P                  ;'       g
    VR	,          pV P                  VR	,          V4      pV'       d,   V
P                  RRRVR	VR	,          RVR,          RV/4       K  RpVR,          '       d9   ^ RIHp V! V^
R7      pVe%   \$        P&                  ! VP)                  4       4      pV
P                  RVRVR,          R,          RVP+                  4       RVRV/4       EKx  	  VR,          '       d   \,        P.                  ! VR,          4       RRRV
/# )	min_matchexclude_wikimediastatusno_matchqueriesrS   bestN	per_placeurl_.rR   failedTreasontitledomaincompute_featuresr   r   rel_pathcaption:Ni  Ncategoryr   sleepok	downloads)r*   r(   r   _search_rank	enumerate_ext_from_urlrm   r9   rq   r   rE   r   netloc	_downloadr   r   r   r   r   r   r   timer   )r<   r   rD   r   ry   wantedr   rankedr   resultsr   iresextfnamer   r   r   errr   r   vs   &&&&&                 r   rt   Command._plan_place  s   UZZ( --,All1oGZZ[1A3GZC[\Fv	  j)W==y>>i;;	'8K(8 9:FA$$SZ0Cxxj!AcU+Eww||G3Hww||E499e<Hc%j)00>>CJF..UX6C  (D(CE
T[]`ah]iksu{!|}C%&&N)(b9=**QXXZ0CH3w<-E668 #& ' ;6 w<<JJs7|$$Y77r   c                   ^ RI HpHp VR,          R8X  dy   RP                  \        P                  VR,          4      4      pV P                  P                  V P                  P                  RVP                   24      RV 2,           4       ^ # VR,          R8X  dJ   VR	,          pV P                  P                  R
VR,          R RVP                   RVR,          : 24       ^ # VR,          '       dU   VP                  P                  VR7      P                  4        VP                  P                  VR7      P                  4        ^ pVR,           EFT  p	V	P                  RR4      p
V
'       d   VP                   RV
 R2MVP                  pV	P                  R4      '       dD   V P                  P                  V P                  P                  RV RV	R,           24      4       K  VP                  P                  V	R,          RVRV	R,          /R7       V	R,          eE   VP                  P                  V	R,          RVP                   R V	R!,          RV	R,          RV/R7       V P                  P                  V P                  P#                  R"V 24      4       V^,          pEKW  	  VR,          '       g)   V P                  P                  R#VP                   24       V# )$z6Write a worker's results to the DB (main thread only).r   r   r   z | r   z  [no match] z
    Tried: rS   r   z	  [match scorez.2fz] z  ->  r   clear)r   r   r   r   z  ()r   z  [download failed] r   r   r   r   r   r   rD   r   r   z
  [saved] z  [no images saved] )rX   rM   r   r   dictfromkeysrn   rd   ro   r   r(   r_   ra   deleter;   r   rD   rw   )r<   r   r   ry   rM   r   triedr   
downloadeddr   labels   &&&&        r   rv   Command._commit_plan?  s   :>Z'JJt}}T)_=>EKK

""]5::,#?@!%)* >Y&<DKKDM#.bF4=BSTVw<<  ''e'4;;=%%E%299;
k""AUU8R(F39uzzl#fXQ/uzzEuuX!!JJ&&)=eWC(}'UV //Z=!5)Qy\B 0  !".$$55 }

':(!,<*=	 6  KKdjj00:eW1EFG!OJ/ #2 K  KK 4UZZLABr   c                   / pV P                   '       d   R V P                    2VR&   ^2pRVRVRR/p V P                  4       P                  \         R2VV^R7      pVP                  R8X  d)   \
        P                  ! ^4       V P                  WR	7      # VP                  ^8w  d   . # VP                  4       P                  R
. 4      #   \        P                   d    . u # i ; i)zBearer Authorizationr   	page_sizematurefalsez/images/)r   paramsr   i  )r  r   )r\   r   r;   r   r   r   r   r   r   r   r   )r<   queryr  r   r  r   s   &&&   r   r   Command._searchr  s    :::)0'=GO$Iuk9hH
	##xj$97+12 $ ?A}}#

2||E|??}}#	668<<	2..(( 	I	s   A"C C (C C"!C"c                |  aa V'       g   . # . pV EF  pVP                  R4      pV'       g   K  V'       dz   \        V4      P                  P                  4       o\        ;QJ d)    V3R lV P
                   4       F  '       g   K   RM	  RM! V3R lV P
                   4       4      '       d   K  RP                  \        RVP                  RR4      RP                  R	 VP                  R
4      ;'       g    .  4       4      VP                  RR4      VP                  RR4      .4      4      p\        \        V4      4      o\        V3R lV 4       4      p	V	\        V4      ,          p
W8  g   EKb  TP                  RTRVP                  R4      ;'       g    RRV
/4       EK  	  VP                  R RR7       V# )z<Score each result by how many place-name tokens it contains.r   c              3   n   <"   T F*  pSV8H  ;'       g    SP                  R V,           4      x  K,  	  R# 5i)r   N)endswith)r   r  hosts   & r   r    Command._rank.<locals>.<genexpr>  s/     \D[qtqy::DMM#'$::D[s   5!5TFr   Nr   r   c              3   D   "   T F  qP                  R R4      x  K  	  R# 5i)r(   r   N)r;   )r   r$   s   & r   r   r    s     M5Kqvr**5Ks    tagscreatorsourcec              3   8   <"   T F  qS9   g   K  ^x  K  	  R# 5i)   Nr   )r   whtokss   & r   r   r    s     7&QJqq&s   
Untitledr   c                     V R ,          # )r   r   )xs   &r   <lambda>Command._rank.<locals>.<lambda>  s    !G*r   )keyreverse)r;   r   r   r    any_WIKIMEDIA_DOMAINSr   ra   rj   r%   sumr#   r   sort)r<   r   r   r   r   scoreditemr   haystackhitsr   r  r  s   &&&&&      @@r   r   Command._rank  sb   ID((5/C }++1133\DD[D[\333\DD[D[\\\xxt"%MTXXf5E5K5K5KMMB'2&	. ! H *+E7&77D3v;&E!3TXXg.<<*U % . 	,d;r   c                    V P                  R 4      ^ ,          P                  R^4      p\        V4      ^8X  d8   V^,          P                  4       \        9   d   V^,          P                  4       # R# )?r   jpg)r"   rsplitr#   r    	VALID_EXT)r   tails   & r   r   Command._ext_from_url  sP    yy~a ''Q/t9>d1gmmo:7==?"r   c                    V P                  4       P                  V^RR7      pVP                  ^8w  d   RVP                   2# \        VR4      ;_uu_ 4       pVP	                  RR7       F  pV'       g   K  VP                  V4       K   	  RRR4       \        P                  P                  V4      pV^ 8X  d   \        P                  ! V4       R# R#   + '       g   i     LP; i  \        P                  P                   d   pR	T 2u Rp?# Rp?i\        P                  P                   d     R
# \        P                  P                   d   pRT 2u Rp?# Rp?i\         d   pRT 2u Rp?# Rp?ii ; i)zKDownload url to abs_path. Returns None on success, error string on failure.T)r   streamzHTTP wbi    )
chunk_sizeNzempty file (0 bytes)zconnection error: ztimed out after 30szrequest error: zfile write error: )r   r;   r   openiter_contentrd   r9   rq   getsizeremover   
exceptionsConnectionErrorTimeoutr   OSError)r<   r   r   r   fchunksizer   s   &&&     r   r   Command._download  s)   	,##CD#AA}}#q}}o..h%%^^t^<Eu = & 77??8,Dqy		(#- &% ""22 	,'s++""** 	)(""33 	)$QC(( 	,'s++	,s   AC( C( C8CAC( C%	 C( (E;DE; E;2E;6E;E;EE;E;+E;,E60E;6E;)r[   r^   r\   N)   )zwikimedia.orgzwikipedia.org)F)__name__
__module____qualname____firstlineno__r0   r>   r   re   r   rf   r   rt   rv   r   r'  r   staticmethodr   r   __static_attributes____classdictcell__)__classdict__s   @r   r,   r,   M   sj     QDY<@JD*YX&(18f1f$ <>  , ,r   r,   >   gifr1  pngjpegwebp>   aanatdeelinlaleofontoanddasderdieforthevonparkhousechurchmuseumsquarestatuebuildingmemorialmonument)__doc__r9   r!   r   r   rY   r   r   urllib.parser   concurrent.futuresr   r   django.core.management.baser   django.confr   r   r   r3  r   r'   r   r%   r*   r,   r   r   r   <module>rs     s     
 	      ! ? 3   * 	= 1' 2	 iX		D
"u,k u,r   