a
    'g5                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZmZmZ d dl	Z	d dl
mZ G dd dZeedd	d
ZedkrdZeeZede  dS )    N)Path)datetime)DictListOptional)SECReportCleanerc                   @   sZ   e Zd Zdd ZedddZedddZeeedd	d
ZeedddZ	dd Z
dS )SECHumanReadableParserc                 C   s<   || _ tj|}tj|}tj|d|dd| _d S )Nhuman_readablez.txtz_human_readable.txt)	file_pathospathdirnamebasenamejoinreplaceoutput_path)selfr
   Zbase_dir	file_name r   B/var/www/html/inwestownie/raporty/src/sec_human_readable_parser.py__init__   s    
zSECHumanReadableParser.__init__)returnc                 C   s<   t | jddd}| W  d    S 1 s.0    Y  d S )Nrutf-8encoding)openr
   read)r   fr   r   r   
_read_file   s    z!SECHumanReadableParser._read_filec                 C   s   t d|  t j}|si S |d}| |d| |d| |d| |d| |d| |d| |d	| |d
ddS )u$   Wyciąga informacje z nagłówka SECz<SEC-HEADER>(.*?)</SEC-HEADER>   zCOMPANY CONFORMED NAME:\s*(.+)zFILED AS OF DATE:\s*(.+)z!CONFORMED SUBMISSION TYPE:\s*(.+)z"CONFORMED PERIOD OF REPORT:\s*(.+)zSTREET 1:\s*(.+)zCITY:\s*(.+)zSTATE:\s*(.+)zZIP:\s*(.+))ZulicaZmiastoZstanZkod)Znazwa_firmyZdata_raportuZtyp_raportuZokres_raportuZadres)researchr   DOTALLgroup_find_value)r   Zheader_sectionZheader_textr   r   r   _extract_header_info   s     









z+SECHumanReadableParser._extract_header_info)textpatternr   c                 C   s"   t ||}|r|d S dS )Nr     )r!   r"   r$   strip)r   r'   r(   matchr   r   r   r%   /   s    z"SECHumanReadableParser._find_value)r'   r   c                 C   s2   t dd|}t dd|}t dd|}| S )z#Usuwa tagi HTML i formatowanie XBRLz<[^>]+> z\{.*?\}r)   z\s+)r!   subr*   )r   r'   r   r   r   _clean_html3   s    z"SECHumanReadableParser._clean_htmlc                 C   s   |   }t|}| }g }|d |d |d || t| jddd }|d| W d   n1 sz0    Y  | jS )	u"   Generuje czytelną wersję raportuzP================================================================================zRAPORT SEC - WERSJA CZYTELNAzQ================================================================================
wr   r   
N)r   r   Zcleanappendr   r   writer   )r   ZcontentZcleanerZcleaned_contentoutputr   r   r   r   generate_human_readable:   s    



.z.SECHumanReadableParser.generate_human_readableN)__name__
__module____qualname__r   strr   r   r&   r%   r.   r4   r   r   r   r   r   
   s   r   )r
   r   c                 C   s   t | }| S )z8Funkcja pomocnicza do przetwarzania pojedynczego raportu)r   r4   )r
   Zparserr   r   r   process_reportQ   s    r9   __main__z:reports/0001385849/2024-10-16_8-K_0001062993-24-017723.txtu%   Utworzono czytelną wersję raportu: )r!   Zjsonpathlibr   r   typingr   r   r   r   Zsrc.sec_cleanerr   r   r8   r9   r5   report_pathZhuman_readable_pathprintr   r   r   r   <module>   s   G